/*
 * Decompiled with CFR 0.152.
 */
package org.apache.doris.load.loadv2.etl;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections.map.MultiValueMap;
import org.apache.doris.common.SparkDppException;
import org.apache.doris.load.loadv2.dpp.GlobalDictBuilder;
import org.apache.doris.load.loadv2.dpp.SparkDpp;
import org.apache.doris.load.loadv2.etl.EtlJobConfig;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SparkEtlJob {
    private static final Logger LOG = LoggerFactory.getLogger(SparkEtlJob.class);
    private static final String BITMAP_DICT_FUNC = "bitmap_dict";
    private static final String TO_BITMAP_FUNC = "to_bitmap";
    private static final String BITMAP_HASH = "bitmap_hash";
    private static final String BINARY_BITMAP = "binary_bitmap";
    private String jobConfigFilePath;
    private EtlJobConfig etlJobConfig;
    private Set<Long> hiveSourceTables;
    private Map<Long, Set<String>> tableToBitmapDictColumns;
    private Map<Long, Set<String>> tableToBinaryBitmapColumns;
    private SparkSession spark;

    private SparkEtlJob(String jobConfigFilePath) {
        this.jobConfigFilePath = jobConfigFilePath;
        this.etlJobConfig = null;
        this.hiveSourceTables = Sets.newHashSet();
        this.tableToBitmapDictColumns = Maps.newHashMap();
        this.tableToBinaryBitmapColumns = Maps.newHashMap();
    }

    private void initSparkEnvironment() {
        SparkConf conf = new SparkConf();
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.set("spark.kryo.registrator", "org.apache.doris.load.loadv2.dpp.DorisKryoRegistrator");
        conf.set("spark.kryo.registrationRequired", "false");
        this.spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate();
    }

    private void initSparkConfigs(Map<String, String> configs) {
        if (configs == null) {
            return;
        }
        for (Map.Entry<String, String> entry : configs.entrySet()) {
            this.spark.sparkContext().conf().set(entry.getKey(), entry.getValue());
        }
    }

    private void initConfig() {
        LOG.info("job config file path: " + this.jobConfigFilePath);
        Dataset ds = this.spark.read().textFile(this.jobConfigFilePath);
        String jsonConfig = (String)ds.first();
        LOG.info("rdd read json config: " + jsonConfig);
        this.etlJobConfig = EtlJobConfig.configFromJson(jsonConfig);
        LOG.info("etl job config: " + this.etlJobConfig);
    }

    private void checkConfig() throws Exception {
        for (Map.Entry<Long, EtlJobConfig.EtlTable> entry : this.etlJobConfig.tables.entrySet()) {
            boolean isHiveSource = false;
            HashSet<String> bitmapDictColumns = Sets.newHashSet();
            HashSet<String> binaryBitmapColumns = Sets.newHashSet();
            for (EtlJobConfig.EtlFileGroup fileGroup : entry.getValue().fileGroups) {
                if (fileGroup.sourceType == EtlJobConfig.SourceType.HIVE) {
                    isHiveSource = true;
                }
                HashMap<String, EtlJobConfig.EtlColumnMapping> newColumnMappings = Maps.newHashMap();
                for (Map.Entry<String, EtlJobConfig.EtlColumnMapping> mappingEntry : fileGroup.columnMappings.entrySet()) {
                    String columnName = mappingEntry.getKey();
                    String exprStr = mappingEntry.getValue().toDescription();
                    String funcName = functions.expr((String)exprStr).expr().prettyName();
                    if (funcName.equalsIgnoreCase(BITMAP_HASH)) {
                        throw new SparkDppException("spark load not support bitmap_hash now");
                    }
                    if (funcName.equalsIgnoreCase(BINARY_BITMAP)) {
                        binaryBitmapColumns.add(columnName.toLowerCase());
                        continue;
                    }
                    if (funcName.equalsIgnoreCase(BITMAP_DICT_FUNC)) {
                        bitmapDictColumns.add(columnName.toLowerCase());
                        continue;
                    }
                    if (funcName.equalsIgnoreCase(TO_BITMAP_FUNC)) continue;
                    newColumnMappings.put(mappingEntry.getKey(), mappingEntry.getValue());
                }
                fileGroup.columnMappings = newColumnMappings;
            }
            if (isHiveSource) {
                this.hiveSourceTables.add(entry.getKey());
            }
            if (!bitmapDictColumns.isEmpty()) {
                this.tableToBitmapDictColumns.put(entry.getKey(), bitmapDictColumns);
            }
            if (binaryBitmapColumns.isEmpty()) continue;
            this.tableToBinaryBitmapColumns.put(entry.getKey(), binaryBitmapColumns);
        }
        LOG.info("init hiveSourceTables: " + this.hiveSourceTables + ", tableToBitmapDictColumns: " + this.tableToBitmapDictColumns);
        if (this.hiveSourceTables.size() > 1 || this.tableToBitmapDictColumns.size() > 1 || this.tableToBinaryBitmapColumns.size() > 1) {
            throw new Exception("spark etl job must have only one hive table with bitmap type column to process");
        }
    }

    private void processDpp() throws Exception {
        SparkDpp sparkDpp = new SparkDpp(this.spark, this.etlJobConfig, this.tableToBitmapDictColumns, this.tableToBinaryBitmapColumns);
        sparkDpp.init();
        sparkDpp.doDpp();
    }

    private String buildGlobalDictAndEncodeSourceTable(EtlJobConfig.EtlTable table, long tableId) {
        MultiValueMap dictColumnMap = new MultiValueMap();
        for (String string : this.tableToBitmapDictColumns.get(tableId)) {
            dictColumnMap.put((Object)string, null);
        }
        ArrayList<String> dorisOlapTableColumnList = Lists.newArrayList();
        for (EtlJobConfig.EtlIndex etlIndex : table.indexes) {
            if (!etlIndex.isBaseIndex) continue;
            for (EtlJobConfig.EtlColumn column : etlIndex.columns) {
                dorisOlapTableColumnList.add(column.columnName);
            }
        }
        EtlJobConfig.EtlFileGroup etlFileGroup = table.fileGroups.get(0);
        String sourceHiveDBTableName = etlFileGroup.hiveDbTableName;
        String dorisHiveDB = sourceHiveDBTableName.split("\\.")[0];
        String taskId = this.etlJobConfig.outputPath.substring(this.etlJobConfig.outputPath.lastIndexOf("/") + 1);
        String globalDictTableName = String.format("doris_global_dict_table_%d", tableId);
        String distinctKeyTableName = String.format("doris_distinct_key_table_%d_%s", tableId, taskId);
        String dorisIntermediateHiveTable = String.format("doris_intermediate_hive_table_%d_%s", tableId, taskId);
        String sourceHiveFilter = etlFileGroup.where;
        ArrayList<String> mapSideJoinColumns = Lists.newArrayList();
        int buildConcurrency = 1;
        ArrayList<String> veryHighCardinalityColumn = Lists.newArrayList();
        int veryHighCardinalityColumnSplitNum = 1;
        LOG.info("global dict builder args, dictColumnMap: " + dictColumnMap + ", dorisOlapTableColumnList: " + dorisOlapTableColumnList + ", sourceHiveDBTableName: " + sourceHiveDBTableName + ", sourceHiveFilter: " + sourceHiveFilter + ", distinctKeyTableName: " + distinctKeyTableName + ", globalDictTableName: " + globalDictTableName + ", dorisIntermediateHiveTable: " + dorisIntermediateHiveTable);
        try {
            GlobalDictBuilder globalDictBuilder = new GlobalDictBuilder(dictColumnMap, dorisOlapTableColumnList, mapSideJoinColumns, sourceHiveDBTableName, sourceHiveFilter, dorisHiveDB, distinctKeyTableName, globalDictTableName, dorisIntermediateHiveTable, buildConcurrency, veryHighCardinalityColumn, veryHighCardinalityColumnSplitNum, this.spark);
            globalDictBuilder.createHiveIntermediateTable();
            globalDictBuilder.extractDistinctColumn();
            globalDictBuilder.buildGlobalDict();
            globalDictBuilder.encodeDorisIntermediateHiveTable();
        }
        catch (Exception e) {
            throw new RuntimeException(e);
        }
        return String.format("%s.%s", dorisHiveDB, dorisIntermediateHiveTable);
    }

    private void processData() throws Exception {
        if (!this.hiveSourceTables.isEmpty()) {
            long tableId = -1L;
            EtlJobConfig.EtlTable table = null;
            Iterator<Map.Entry<Long, EtlJobConfig.EtlTable>> iterator = this.etlJobConfig.tables.entrySet().iterator();
            if (iterator.hasNext()) {
                Map.Entry<Long, EtlJobConfig.EtlTable> entry = iterator.next();
                tableId = entry.getKey();
                table = entry.getValue();
            }
            EtlJobConfig.EtlFileGroup fileGroup = table.fileGroups.get(0);
            this.initSparkConfigs(fileGroup.hiveTableProperties);
            fileGroup.dppHiveDbTableName = fileGroup.hiveDbTableName;
            if (!this.tableToBitmapDictColumns.isEmpty() && this.tableToBitmapDictColumns.containsKey(tableId)) {
                String dorisIntermediateHiveDbTableName;
                fileGroup.dppHiveDbTableName = dorisIntermediateHiveDbTableName = this.buildGlobalDictAndEncodeSourceTable(table, tableId);
            }
        }
        this.processDpp();
    }

    private void run() throws Exception {
        this.initSparkEnvironment();
        this.initConfig();
        this.checkConfig();
        this.processData();
    }

    public static void main(String[] args) {
        if (args.length < 1) {
            System.err.println("missing job config file path arg");
            System.exit(-1);
        }
        try {
            new SparkEtlJob(args[0]).run();
        }
        catch (Exception e) {
            System.err.println("spark etl job run failed");
            e.printStackTrace();
            System.exit(-1);
        }
    }
}

