/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.job;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.CubeUpdate;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
import org.apache.kylin.engine.spark.NSparkCubingEngine;
import org.apache.kylin.engine.spark.application.SparkApplication;
import org.apache.kylin.engine.spark.builder.NBuildSourceInfo;
import org.apache.kylin.engine.spark.job.AggInfo;
import org.apache.kylin.engine.spark.job.BuildLayoutWithUpdate;
import org.apache.kylin.engine.spark.job.CuboidAggregator;
import org.apache.kylin.engine.spark.job.LogJobInfoUtils;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.job.ParentSourceChooser;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.kylin.engine.spark.metadata.cube.ManagerHub;
import org.apache.kylin.engine.spark.metadata.cube.PathManager;
import org.apache.kylin.engine.spark.metadata.cube.model.ForestSpanningTree;
import org.apache.kylin.engine.spark.metadata.cube.model.LayoutEntity;
import org.apache.kylin.engine.spark.metadata.cube.model.SpanningTree;
import org.apache.kylin.engine.spark.utils.BuildUtils;
import org.apache.kylin.engine.spark.utils.JobMetrics;
import org.apache.kylin.engine.spark.utils.JobMetricsUtils;
import org.apache.kylin.engine.spark.utils.Metrics;
import org.apache.kylin.engine.spark.utils.QueryExecutionCache;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.shaded.com.google.common.base.Joiner;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.shaded.com.google.common.collect.Sets;
import org.apache.kylin.storage.StorageFactory;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.collection.JavaConversions;

public class CubeBuildJob
extends SparkApplication {
    protected static final Logger logger = LoggerFactory.getLogger(CubeBuildJob.class);
    protected static String TEMP_DIR_SUFFIX = "_temp";
    private CubeManager cubeManager;
    private CubeInstance cubeInstance;
    private BuildLayoutWithUpdate buildLayoutWithUpdate;
    private Map<Long, Short> cuboidShardNum = Maps.newConcurrentMap();
    private Map<Long, Long> cuboidsRowCount = Maps.newConcurrentMap();
    private Map<Long, Long> recommendCuboidMap = new HashMap<Long, Long>();

    public static void main(String[] args) {
        CubeBuildJob cubeBuildJob = new CubeBuildJob();
        cubeBuildJob.execute(args);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    protected void doExecute() throws Exception {
        ParentSourceChooser sourceChooser;
        ForestSpanningTree spanningTree;
        boolean needStatistics;
        long start = System.currentTimeMillis();
        logger.info("Start building cube job for {} ...", (Object)this.getParam("segmentIds"));
        HashSet<String> segmentIds = Sets.newHashSet(StringUtils.split((String)this.getParam("segmentIds")));
        Preconditions.checkArgument(segmentIds.size() == 1, "Build one segment in one time.");
        String firstSegmentId = (String)segmentIds.iterator().next();
        String cubeName = this.getParam("cubeId");
        SegmentInfo seg = ManagerHub.getSegmentInfo(this.config, cubeName, firstSegmentId);
        this.cubeManager = CubeManager.getInstance(this.config);
        this.cubeInstance = this.cubeManager.getCubeByUuid(cubeName);
        CubeSegment newSegment = this.cubeInstance.getSegmentById(firstSegmentId);
        boolean bl = needStatistics = StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(newSegment) || this.config.isSegmentStatisticsEnabled();
        if (needStatistics) {
            long startMills = System.currentTimeMillis();
            spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(seg.toBuildLayouts()));
            sourceChooser = new ParentSourceChooser(spanningTree, seg, this.jobId, this.ss, this.config, false);
            sourceChooser.setNeedStatistics();
            sourceChooser.decideFlatTableSource(null);
            HashMap<Long, HLLCounter> hllMap = new HashMap<Long, HLLCounter>();
            for (Tuple2<Object, AggInfo> cuboidData : sourceChooser.aggInfo()) {
                hllMap.put((Long)cuboidData._1, ((AggInfo)cuboidData._2).cuboid().counter());
            }
            logger.info("Cuboid statistics return {} records and cost {} ms.", (Object)hllMap.size(), (Object)(System.currentTimeMillis() - startMills));
            String jobTmpDir = this.config.getJobTmpDir(this.project) + "/" + this.jobId;
            Path statisticsDir = new Path((String)jobTmpDir + "/" + "/cube_statistics" + "/" + cubeName + "/" + firstSegmentId + "/");
            Optional<HLLCounter> hll = hllMap.values().stream().max(Comparator.comparingLong(HLLCounter::getCountEstimate));
            long rc = hll.map(HLLCounter::getCountEstimate).orElse(1L);
            CubeStatsWriter.writeCuboidStatistics(HadoopUtil.getCurrentConfiguration(), statisticsDir, hllMap, 1, rc);
            FileSystem fs = HadoopUtil.getWorkingFileSystem();
            ResourceStore rs = ResourceStore.getStore(this.config);
            String metaKey = newSegment.getStatisticsResourcePath();
            Path statisticsFile = new Path(statisticsDir, "cuboid_statistics.seq");
            FSDataInputStream is = fs.open(statisticsFile);
            rs.putResource(metaKey, (InputStream)is, System.currentTimeMillis());
            logger.info("{}'s stats saved to resource key({}) with path({})", newSegment, metaKey, statisticsFile);
            this.recommendCuboidMap = StatisticsDecisionUtil.optimizeCubingPlan(newSegment);
            if (!this.recommendCuboidMap.isEmpty()) {
                logger.info("Triggered cube planner phase one .");
            }
        }
        this.buildLayoutWithUpdate = new BuildLayoutWithUpdate();
        ArrayList<String> persistedFlatTable = new ArrayList<String>();
        ArrayList persistedViewFactTable = new ArrayList();
        Path shareDir = this.config.getJobTmpShareDir(this.project, this.jobId);
        try {
            for (String segId : segmentIds) {
                seg = ManagerHub.getSegmentInfo(this.config, cubeName, segId);
                spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(seg.toBuildLayouts()));
                logger.info("There are {} cuboids to be built in segment {}.", (Object)seg.toBuildLayouts().size(), (Object)seg.name());
                for (LayoutEntity cuboid : JavaConversions.asJavaCollection(seg.toBuildLayouts())) {
                    logger.debug("Cuboid {} has row keys: {}", (Object)cuboid.getId(), (Object)Joiner.on(", ").join(cuboid.getOrderedDimensions().keySet()));
                }
                sourceChooser = new ParentSourceChooser(spanningTree, seg, this.jobId, this.ss, this.config, true);
                sourceChooser.decideSources();
                NBuildSourceInfo buildFromFlatTable = sourceChooser.flatTableSource();
                Map<Long, NBuildSourceInfo> buildFromLayouts = sourceChooser.reuseSources();
                this.infos.clearCuboidsNumPerLayer(segId);
                if (buildFromFlatTable != null) {
                    this.collectPersistedTablePath(persistedFlatTable, sourceChooser);
                    this.build(Collections.singletonList(buildFromFlatTable), seg, spanningTree);
                }
                if (!buildFromLayouts.isEmpty()) {
                    this.build(buildFromLayouts.values(), seg, spanningTree);
                }
                this.infos.recordSpanningTree(segId, spanningTree);
                logger.info("Updating segment info");
                assert (buildFromFlatTable != null);
                this.updateSegmentInfo(this.getParam("cubeId"), seg, buildFromFlatTable.getFlatTableDS().count());
            }
            this.updateCubeAndSegmentMeta(this.getParam("cubeId"), ResourceDetectUtils.getSegmentSourceSize(shareDir), this.recommendCuboidMap);
        }
        finally {
            FileSystem fs = HadoopUtil.getWorkingFileSystem();
            for (String viewPath : persistedViewFactTable) {
                fs.delete(new Path(viewPath), true);
                logger.info("Delete persisted view fact table: {}.", (Object)viewPath);
            }
            for (String path : persistedFlatTable) {
                fs.delete(new Path(path), true);
                logger.info("Delete persisted flat table: {}.", (Object)path);
            }
            logger.info("Building job takes {} ms", (Object)(System.currentTimeMillis() - start));
        }
    }

    private void updateSegmentInfo(String cubeId, SegmentInfo segmentInfo, long sourceRowCount) throws IOException {
        CubeInstance cubeInstance = this.cubeManager.getCubeByUuid(cubeId);
        CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
        CubeUpdate update2 = new CubeUpdate(cubeCopy);
        ArrayList<CubeSegment> cubeSegments = Lists.newArrayList();
        CubeSegment segment = cubeCopy.getSegmentById(segmentInfo.id());
        segment.setSizeKB(segmentInfo.getAllLayoutSize() / 1024L);
        LinkedList<String> cuboidStatics = new LinkedList<String>();
        String template = "{\"cuboid\":%d, \"rows\": %d, \"size\": %d \"deviation\": %7f}";
        for (LayoutEntity layoutEntity : segmentInfo.getAllLayoutJava()) {
            double deviation = 0.0;
            if (layoutEntity.getRows() > 0L && this.recommendCuboidMap != null && !this.recommendCuboidMap.isEmpty()) {
                long diff = layoutEntity.getRows() - this.recommendCuboidMap.get(layoutEntity.getId());
                deviation = (double)diff / ((double)layoutEntity.getRows() + 0.0);
            }
            cuboidStatics.add(String.format(Locale.getDefault(), template, layoutEntity.getId(), layoutEntity.getRows(), layoutEntity.getByteSize(), deviation));
        }
        try {
            FileSystem fs = HadoopUtil.getWorkingFileSystem();
            JavaSparkContext jsc = JavaSparkContext.fromSparkContext((SparkContext)this.ss.sparkContext());
            JavaRDD cuboidStatRdd = jsc.parallelize(cuboidStatics, 1);
            for (String cuboid : cuboidStatics) {
                logger.info("Statistics \t: {}", (Object)cuboid);
            }
            String pathDir = this.config.getHdfsWorkingDirectory() + segment.getPreciseStatisticsResourcePath();
            logger.info("Saving {} {} .", (Object)pathDir, (Object)segmentInfo);
            Path path = new Path(pathDir);
            if (fs.exists(path)) {
                fs.delete(path, true);
            }
            cuboidStatRdd.saveAsTextFile(pathDir);
        }
        catch (Exception e) {
            logger.error("Write metrics failed.", e);
        }
        segment.setLastBuildTime(System.currentTimeMillis());
        segment.setLastBuildJobID(this.getParam("jobId"));
        segment.setInputRecords(sourceRowCount);
        segment.setSnapshots(new ConcurrentHashMap<String, String>(segmentInfo.getSnapShot2JavaMap()));
        segment.setCuboidShardNums(this.cuboidShardNum);
        Map<String, String> additionalInfo = segment.getAdditionalInfo();
        additionalInfo.put("storageType", "4");
        segment.setAdditionalInfo(additionalInfo);
        cubeSegments.add(segment);
        update2.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
        this.cubeManager.updateCube(update2);
    }

    private void collectPersistedTablePath(List<String> persistedFlatTable, ParentSourceChooser sourceChooser) {
        String flatTablePath = sourceChooser.persistFlatTableIfNecessary();
        if (!flatTablePath.isEmpty()) {
            persistedFlatTable.add(flatTablePath);
        }
    }

    private void updateCubeAndSegmentMeta(String cubeId, Map<String, Object> toUpdateSegmentSourceSize, Map<Long, Long> recommendCuboidMap) throws IOException {
        CubeInstance cubeInstance = this.cubeManager.getCubeByUuid(cubeId);
        CubeInstance cubeCopy = cubeInstance.latestCopyForWrite();
        CubeUpdate update2 = new CubeUpdate(cubeCopy);
        if (recommendCuboidMap != null && !recommendCuboidMap.isEmpty()) {
            update2.setCuboids(recommendCuboidMap);
        }
        ArrayList<CubeSegment> cubeSegments = Lists.newArrayList();
        for (Map.Entry<String, Object> entry : toUpdateSegmentSourceSize.entrySet()) {
            CubeSegment segment = cubeCopy.getSegmentById(entry.getKey());
            if (segment.getInputRecords() <= 0L) continue;
            segment.setInputRecordsSize((Long)entry.getValue());
            segment.setLastBuildTime(System.currentTimeMillis());
            cubeSegments.add(segment);
        }
        if (!cubeSegments.isEmpty()) {
            update2.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
            this.cubeManager.updateCube(update2);
        }
    }

    private void build(Collection<NBuildSourceInfo> buildSourceInfos, SegmentInfo seg, SpanningTree st) {
        List<NBuildSourceInfo> theFirstLevelBuildInfos = this.buildLayer(buildSourceInfos, seg, st);
        LinkedList<List<NBuildSourceInfo>> queue = new LinkedList<List<NBuildSourceInfo>>();
        if (!theFirstLevelBuildInfos.isEmpty()) {
            queue.offer(theFirstLevelBuildInfos);
        }
        while (!queue.isEmpty()) {
            List buildInfos = (List)queue.poll();
            List<NBuildSourceInfo> theNextLayer = this.buildLayer(buildInfos, seg, st);
            if (theNextLayer.isEmpty()) continue;
            queue.offer(theNextLayer);
        }
    }

    private List<NBuildSourceInfo> buildLayer(Collection<NBuildSourceInfo> buildSourceInfos, final SegmentInfo seg, final SpanningTree st) {
        int cuboidsNumInLayer = 0;
        ArrayList<LayoutEntity> allIndexesInCurrentLayer = new ArrayList<LayoutEntity>();
        for (final NBuildSourceInfo info : buildSourceInfos) {
            Collection<LayoutEntity> toBuildCuboids = info.getToBuildCuboids();
            this.infos.recordParent2Children(info.getLayout(), toBuildCuboids.stream().map(LayoutEntity::getId).collect(Collectors.toList()));
            cuboidsNumInLayer += toBuildCuboids.size();
            Preconditions.checkState(!toBuildCuboids.isEmpty(), "To be built cuboids is empty.");
            final Dataset<Row> parentDS = info.getParentDS();
            if (info.getLayoutId() == ParentSourceChooser.FLAT_TABLE_FLAG()) {
                this.cuboidsRowCount.putIfAbsent(info.getLayoutId(), parentDS.count());
            }
            for (final LayoutEntity index : toBuildCuboids) {
                Preconditions.checkNotNull(parentDS, "Parent dataset is null when building.");
                this.buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity(){

                    @Override
                    public String getName() {
                        return "build-cuboid-" + index.getId();
                    }

                    @Override
                    public LayoutEntity build() throws IOException {
                        return CubeBuildJob.this.buildCuboid(seg, index, (Dataset<Row>)parentDS, st, info.getLayoutId());
                    }
                }, this.config);
                allIndexesInCurrentLayer.add(index);
            }
        }
        this.infos.recordCuboidsNumPerLayer(seg.id(), cuboidsNumInLayer);
        this.buildLayoutWithUpdate.updateLayout(seg, this.config);
        st.decideTheNextLayer(allIndexesInCurrentLayer, seg);
        return this.constructTheNextLayerBuildInfos(st, seg, allIndexesInCurrentLayer);
    }

    private List<NBuildSourceInfo> constructTheNextLayerBuildInfos(SpanningTree st, SegmentInfo seg, Collection<LayoutEntity> allIndexesInCurrentLayer) {
        ArrayList<NBuildSourceInfo> childrenBuildSourceInfos = new ArrayList<NBuildSourceInfo>();
        for (LayoutEntity index : allIndexesInCurrentLayer) {
            Collection<LayoutEntity> children = st.getChildrenByIndexPlan(index);
            if (children.isEmpty()) continue;
            NBuildSourceInfo theRootLevelBuildInfos = new NBuildSourceInfo();
            theRootLevelBuildInfos.setSparkSession(this.ss);
            String path = PathManager.getParquetStoragePath(this.config, this.getParam("cubeName"), seg.name(), seg.identifier(), String.valueOf(index.getId()));
            theRootLevelBuildInfos.setLayoutId(index.getId());
            theRootLevelBuildInfos.setParentStoragePath(path);
            theRootLevelBuildInfos.setToBuildCuboids(children);
            childrenBuildSourceInfos.add(theRootLevelBuildInfos);
        }
        return childrenBuildSourceInfos;
    }

    @Override
    protected String calculateRequiredCores() throws Exception {
        if (this.config.getSparkEngineTaskImpactInstanceEnabled().booleanValue()) {
            Path shareDir = this.config.getJobTmpShareDir(this.project, this.jobId);
            String maxLeafTasksNums = this.maxLeafTasksNums(shareDir);
            logger.info("The maximum number of tasks required to run the job is {}", (Object)maxLeafTasksNums);
            int factor = this.config.getSparkEngineTaskCoreFactor();
            int i = Double.valueOf(maxLeafTasksNums).intValue() / factor;
            logger.info("require cores: " + i);
            return String.valueOf(i);
        }
        return this.config.getSparkEngineRequiredTotalCores();
    }

    private String maxLeafTasksNums(Path shareDir) throws IOException {
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        FileStatus[] fileStatuses = fs.listStatus(shareDir, path -> path.toString().endsWith(ResourceDetectUtils.cubingDetectItemFileSuffix()));
        return ResourceDetectUtils.selectMaxValueInFiles(fileStatuses);
    }

    private LayoutEntity buildCuboid(SegmentInfo seg, LayoutEntity cuboid, Dataset<Row> parent, SpanningTree spanningTree, long parentId) throws IOException {
        String parentName = String.valueOf(parentId);
        if (parentId == ParentSourceChooser.FLAT_TABLE_FLAG()) {
            parentName = "flat table";
        }
        logger.info("Build index:{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.id());
        LayoutEntity layoutEntity = cuboid;
        Set<Integer> dimIndexes = cuboid.getOrderedDimensions().keySet();
        if (cuboid.isTableIndex()) {
            Dataset afterPrj = parent.select(NSparkCubingUtil.getColumns(dimIndexes));
            logger.info("Build layout:{}, in index:{}", (Object)layoutEntity.getId(), (Object)cuboid.getId());
            this.ss.sparkContext().setJobDescription("build " + layoutEntity.getId() + " from parent " + parentName);
            Set<Integer> orderedDims = layoutEntity.getOrderedDimensions().keySet();
            Dataset afterSort = afterPrj.select(NSparkCubingUtil.getColumns(orderedDims)).sortWithinPartitions(NSparkCubingUtil.getColumns(orderedDims));
            this.saveAndUpdateLayout((Dataset<Row>)afterSort, seg, layoutEntity, parentId);
        } else {
            Dataset<Row> afterAgg = CuboidAggregator.agg(this.ss, parent, dimIndexes, cuboid.getOrderedMeasures(), spanningTree, false);
            logger.info("Build layout:{}, in index:{}", (Object)layoutEntity.getId(), (Object)cuboid.getId());
            this.ss.sparkContext().setJobDescription("build " + layoutEntity.getId() + " from parent " + parentName);
            Set<Integer> rowKeys = layoutEntity.getOrderedDimensions().keySet();
            Dataset afterSort = afterAgg.select(NSparkCubingUtil.getColumns(rowKeys, layoutEntity.getOrderedMeasures().keySet())).sortWithinPartitions(NSparkCubingUtil.getColumns(rowKeys));
            this.saveAndUpdateLayout((Dataset<Row>)afterSort, seg, layoutEntity, parentId);
        }
        this.ss.sparkContext().setJobDescription(null);
        logger.info("Finished Build index :{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.id());
        return layoutEntity;
    }

    private void saveAndUpdateLayout(Dataset<Row> dataset, SegmentInfo seg, LayoutEntity layout, long parentId) throws IOException {
        long layoutId = layout.getId();
        String queryExecutionId = UUID.randomUUID().toString();
        this.ss.sparkContext().setLocalProperty(QueryExecutionCache.N_EXECUTION_ID_KEY(), queryExecutionId);
        NSparkCubingEngine.NSparkCubingStorage storage = StorageFactory.createEngineAdapter(layout, NSparkCubingEngine.NSparkCubingStorage.class);
        String path = PathManager.getParquetStoragePath(this.config, this.getParam("cubeName"), seg.name(), seg.identifier(), String.valueOf(layoutId));
        String tempPath = path + TEMP_DIR_SUFFIX;
        logger.info("Cuboids are saved to temp path : " + tempPath);
        storage.saveTo(tempPath, dataset, this.ss);
        JobMetrics metrics = JobMetricsUtils.collectMetrics(queryExecutionId);
        long rowCount = metrics.getMetrics(Metrics.CUBOID_ROWS_CNT());
        if (rowCount == -1L) {
            this.infos.recordAbnormalLayouts(layoutId, "'Job metrics seems null, use count() to collect cuboid rows.'");
            logger.debug("Can not get cuboid row cnt, use count() to collect cuboid rows.");
            long cuboidRowCnt = dataset.count();
            layout.setRows(cuboidRowCnt);
            this.cuboidsRowCount.putIfAbsent(layoutId, cuboidRowCnt);
            layout.setSourceRows(this.cuboidsRowCount.get(parentId));
        } else {
            layout.setRows(rowCount);
            layout.setSourceRows(metrics.getMetrics(Metrics.SOURCE_ROWS_CNT()));
        }
        int shardNum = BuildUtils.repartitionIfNeed(layout, storage, path, tempPath, this.cubeInstance.getConfig(), this.ss);
        layout.setShardNum(shardNum);
        this.cuboidShardNum.put(layoutId, (short)shardNum);
        this.ss.sparkContext().setLocalProperty(QueryExecutionCache.N_EXECUTION_ID_KEY(), null);
        QueryExecutionCache.removeQueryExecution(queryExecutionId);
        BuildUtils.fillCuboidInfo(layout, path);
    }

    @Override
    protected String generateInfo() {
        return LogJobInfoUtils.dfBuildJobInfo();
    }
}

