/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark.job;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.CubeUpdate;
import org.apache.kylin.cube.cuboid.CuboidModeEnum;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
import org.apache.kylin.engine.spark.NSparkCubingEngine;
import org.apache.kylin.engine.spark.application.SparkApplication;
import org.apache.kylin.engine.spark.builder.NBuildSourceInfo;
import org.apache.kylin.engine.spark.job.AggInfo;
import org.apache.kylin.engine.spark.job.BuildLayoutWithUpdate;
import org.apache.kylin.engine.spark.job.CuboidAggregator;
import org.apache.kylin.engine.spark.job.CuboidStatisticsJob;
import org.apache.kylin.engine.spark.job.LogJobInfoUtils;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.job.ParentSourceChooser;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.kylin.engine.spark.metadata.cube.ManagerHub;
import org.apache.kylin.engine.spark.metadata.cube.PathManager;
import org.apache.kylin.engine.spark.metadata.cube.model.ForestSpanningTree;
import org.apache.kylin.engine.spark.metadata.cube.model.LayoutEntity;
import org.apache.kylin.engine.spark.metadata.cube.model.SpanningTree;
import org.apache.kylin.engine.spark.utils.BuildUtils;
import org.apache.kylin.engine.spark.utils.JobMetrics;
import org.apache.kylin.engine.spark.utils.JobMetricsUtils;
import org.apache.kylin.engine.spark.utils.Metrics;
import org.apache.kylin.engine.spark.utils.QueryExecutionCache;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.shaded.com.google.common.base.Joiner;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.storage.StorageFactory;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.hive.utils.ResourceDetectUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.collection.JavaConversions;

public class OptimizeBuildJob
extends SparkApplication {
    private static final Logger logger = LoggerFactory.getLogger(OptimizeBuildJob.class);
    private Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
    protected static String TEMP_DIR_SUFFIX = "_temp";
    private BuildLayoutWithUpdate buildLayoutWithUpdate;
    private Map<Long, Short> cuboidShardNum = Maps.newConcurrentMap();
    private Map<Long, Long> cuboidsRowCount = Maps.newConcurrentMap();
    private Configuration conf = HadoopUtil.getCurrentConfiguration();
    private CubeManager cubeManager;
    private CubeInstance cubeInstance;
    private SegmentInfo optSegInfo;
    private SegmentInfo originalSegInfo;
    private CubeSegment optSeg;
    private CubeSegment originalSeg;
    private long baseCuboidId;

    public static void main(String[] args) {
        OptimizeBuildJob optimizeBuildJob = new OptimizeBuildJob();
        optimizeBuildJob.execute(args);
    }

    @Override
    protected void doExecute() throws Exception {
        String segmentId = this.getParam("segmentId");
        String cubeId = this.getParam("cubeId");
        this.cubeManager = CubeManager.getInstance(this.config);
        this.cubeInstance = this.cubeManager.getCubeByUuid(cubeId);
        this.optSeg = this.cubeInstance.getSegmentById(segmentId);
        this.originalSeg = this.cubeInstance.getOriginalSegmentToOptimize(this.optSeg);
        this.originalSegInfo = ManagerHub.getSegmentInfo(this.config, cubeId, this.originalSeg.getUuid());
        this.calculateCuboidFromBaseCuboid();
        this.buildCuboidFromParent(cubeId);
    }

    private void calculateCuboidFromBaseCuboid() throws IOException {
        logger.info("Start to calculate cuboid statistics for optimized segment");
        long start = System.currentTimeMillis();
        this.baseCuboidId = this.cubeInstance.getCuboidScheduler().getBaseCuboidId();
        LayoutEntity baseCuboid = (LayoutEntity)this.originalSegInfo.getAllLayoutJava().stream().filter(layoutEntity -> layoutEntity.getId() == this.baseCuboidId).iterator().next();
        Dataset<Row> baseCuboidDS = StorageFactory.createEngineAdapter(baseCuboid, NSparkCubingEngine.NSparkCubingStorage.class).getFrom(PathManager.getParquetStoragePath(this.config, this.cubeInstance.getName(), this.optSeg.getName(), this.optSeg.getStorageLocationIdentifier(), String.valueOf(baseCuboid.getId())), this.ss);
        HashMap<Long, HLLCounter> hllMap = new HashMap<Long, HLLCounter>();
        for (Tuple2<Object, AggInfo> cuboidData : CuboidStatisticsJob.statistics(baseCuboidDS, this.originalSegInfo, this.getNewCuboidIds())) {
            hllMap.put((Long)cuboidData._1, ((AggInfo)cuboidData._2).cuboid().counter());
        }
        String jobTmpDir = this.config.getJobTmpDir(this.project) + "/" + this.jobId;
        Path statisticsDir = new Path(jobTmpDir + "/" + "/cube_statistics" + "/" + this.cubeInstance.getUuid() + "/" + this.optSeg.getUuid() + "/");
        CubeStatsWriter.writeCuboidStatistics(this.conf, statisticsDir, hllMap, 1, -1L);
        logger.info("Calculate cuboid statistics from base cuboid job takes {} ms", (Object)(System.currentTimeMillis() - start));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void buildCuboidFromParent(String cubeId) throws IOException {
        logger.info("Start to build recommend cuboid for optimized segment");
        long start = System.currentTimeMillis();
        this.optSegInfo = ManagerHub.getSegmentInfo(this.config, cubeId, this.optSeg.getUuid(), CuboidModeEnum.RECOMMEND);
        this.buildLayoutWithUpdate = new BuildLayoutWithUpdate(this.config);
        this.infos.clearAddCuboids();
        try {
            ForestSpanningTree spanningTree = new ForestSpanningTree(JavaConversions.asJavaCollection(this.optSegInfo.toBuildLayouts()));
            logger.info("There are {} cuboids to be built in segment {}.", (Object)this.optSegInfo.toBuildLayouts().size(), (Object)this.optSegInfo.name());
            for (LayoutEntity cuboid : JavaConversions.asJavaCollection(this.optSegInfo.toBuildLayouts())) {
                logger.debug("Cuboid {} has row keys: {}", (Object)cuboid.getId(), (Object)Joiner.on(", ").join(cuboid.getOrderedDimensions().keySet()));
            }
            this.optSegInfo.removeLayout(this.baseCuboidId);
            ParentSourceChooser sourceChooser = new ParentSourceChooser(spanningTree, this.optSegInfo, this.optSeg, this.jobId, this.ss, this.config, false);
            sourceChooser.decideSources();
            Map<Long, NBuildSourceInfo> buildFromLayouts = sourceChooser.reuseSources();
            this.infos.clearCuboidsNumPerLayer(this.optSegInfo.id());
            if (!buildFromLayouts.isEmpty()) {
                this.build(buildFromLayouts.values(), this.optSegInfo, spanningTree);
            }
            this.infos.recordSpanningTree(this.optSegInfo.id(), spanningTree);
            logger.info("Updating segment info");
            this.updateOptimizeSegmentInfo();
        }
        finally {
            logger.info("Building job takes {} ms", (Object)(System.currentTimeMillis() - start));
        }
    }

    private long[] getNewCuboidIds() {
        Set<Long> recommendCuboidsSet = this.cubeInstance.getCuboidsByMode(CuboidModeEnum.RECOMMEND_MISSING);
        Preconditions.checkNotNull(recommendCuboidsSet, "The recommend cuboid map could not be null");
        long[] recommendCuboid = new long[recommendCuboidsSet.size()];
        int i = 0;
        for (long cuboidId : recommendCuboidsSet) {
            recommendCuboid[i++] = cuboidId;
        }
        return recommendCuboid;
    }

    protected void updateOptimizeSegmentInfo() throws IOException {
        CubeInstance cubeCopy = this.optSeg.getCubeInstance().latestCopyForWrite();
        ArrayList<CubeSegment> cubeSegments = Lists.newArrayList();
        CubeUpdate update2 = new CubeUpdate(cubeCopy);
        this.optSeg.setSizeKB(this.optSegInfo.getAllLayoutSize() / 1024L);
        this.optSeg.setLastBuildTime(System.currentTimeMillis());
        this.optSeg.setLastBuildJobID(this.jobId);
        this.optSeg.setInputRecords(this.originalSeg.getInputRecords());
        Map<Long, Short> existingShardNums = this.originalSeg.getCuboidShardNums();
        for (Long cuboidId : cubeCopy.getCuboidsByMode(CuboidModeEnum.RECOMMEND_EXISTING)) {
            this.cuboidShardNum.putIfAbsent(cuboidId, existingShardNums.get(cuboidId));
        }
        this.optSeg.setCuboidShardNums(this.cuboidShardNum);
        this.optSeg.setInputRecordsSize(this.originalSeg.getInputRecordsSize());
        Map<String, String> additionalInfo = this.optSeg.getAdditionalInfo();
        additionalInfo.put("storageType", "4");
        this.optSeg.setAdditionalInfo(additionalInfo);
        cubeSegments.add(this.optSeg);
        update2.setToUpdateSegs(cubeSegments.toArray(new CubeSegment[0]));
        this.cubeManager.updateCube(update2, true);
    }

    private void build(Collection<NBuildSourceInfo> buildSourceInfos, SegmentInfo seg, SpanningTree st) {
        List<NBuildSourceInfo> theFirstLevelBuildInfos = this.buildLayer(buildSourceInfos, seg, st);
        LinkedList<List<NBuildSourceInfo>> queue = new LinkedList<List<NBuildSourceInfo>>();
        if (!theFirstLevelBuildInfos.isEmpty()) {
            queue.offer(theFirstLevelBuildInfos);
        }
        while (!queue.isEmpty()) {
            List buildInfos = (List)queue.poll();
            List<NBuildSourceInfo> theNextLayer = this.buildLayer(buildInfos, seg, st);
            if (theNextLayer.isEmpty()) continue;
            queue.offer(theNextLayer);
        }
    }

    private List<NBuildSourceInfo> buildLayer(Collection<NBuildSourceInfo> buildSourceInfos, final SegmentInfo seg, final SpanningTree st) {
        int cuboidsNumInLayer = 0;
        ArrayList<LayoutEntity> allIndexesInCurrentLayer = new ArrayList<LayoutEntity>();
        for (final NBuildSourceInfo info : buildSourceInfos) {
            Collection<LayoutEntity> toBuildCuboids = info.getToBuildCuboids();
            this.infos.recordParent2Children(info.getLayout(), toBuildCuboids.stream().map(LayoutEntity::getId).collect(Collectors.toList()));
            cuboidsNumInLayer += toBuildCuboids.size();
            Preconditions.checkState(!toBuildCuboids.isEmpty(), "To be built cuboids is empty.");
            final Dataset<Row> parentDS = info.getParentDS();
            if (info.getLayoutId() == ParentSourceChooser.FLAT_TABLE_FLAG()) {
                this.cuboidsRowCount.putIfAbsent(info.getLayoutId(), parentDS.count());
            }
            for (final LayoutEntity index : toBuildCuboids) {
                Preconditions.checkNotNull(parentDS, "Parent dataset is null when building.");
                if (!this.cubeInstance.getCuboidsByMode(CuboidModeEnum.RECOMMEND_EXISTING).contains(index.getId())) {
                    this.infos.recordAddCuboids(index.getId());
                    this.buildLayoutWithUpdate.submit(new BuildLayoutWithUpdate.JobEntity(){

                        @Override
                        public String getName() {
                            return "build-cuboid-" + index.getId();
                        }

                        @Override
                        public LayoutEntity build() throws IOException {
                            return OptimizeBuildJob.this.buildCuboid(seg, index, (Dataset<Row>)parentDS, st, info.getLayoutId());
                        }

                        @Override
                        public NBuildSourceInfo getBuildSourceInfo() {
                            return null;
                        }
                    }, this.config);
                } else {
                    try {
                        this.updateExistingLayout(index, info.getLayoutId());
                    }
                    catch (IOException e) {
                        logger.error("Failed to update existing cuboid info: {}", (Object)index.getId());
                    }
                }
                allIndexesInCurrentLayer.add(index);
            }
        }
        this.infos.recordCuboidsNumPerLayer(seg.id(), cuboidsNumInLayer);
        this.buildLayoutWithUpdate.updateLayout(seg, this.config);
        st.decideTheNextLayer(allIndexesInCurrentLayer, seg);
        return this.constructTheNextLayerBuildInfos(st, seg, allIndexesInCurrentLayer);
    }

    private List<NBuildSourceInfo> constructTheNextLayerBuildInfos(SpanningTree st, SegmentInfo seg, Collection<LayoutEntity> allIndexesInCurrentLayer) {
        ArrayList<NBuildSourceInfo> childrenBuildSourceInfos = new ArrayList<NBuildSourceInfo>();
        for (LayoutEntity index : allIndexesInCurrentLayer) {
            Collection<LayoutEntity> children = st.getChildrenByIndexPlan(index);
            if (children.isEmpty()) continue;
            NBuildSourceInfo theRootLevelBuildInfos = new NBuildSourceInfo();
            theRootLevelBuildInfos.setSparkSession(this.ss);
            String path = PathManager.getParquetStoragePath(this.config, this.getParam("cubeName"), seg.name(), seg.identifier(), String.valueOf(index.getId()));
            theRootLevelBuildInfos.setLayoutId(index.getId());
            theRootLevelBuildInfos.setParentStoragePath(path);
            theRootLevelBuildInfos.setToBuildCuboids(children);
            childrenBuildSourceInfos.add(theRootLevelBuildInfos);
        }
        return childrenBuildSourceInfos;
    }

    @Override
    protected String calculateRequiredCores() throws Exception {
        if (this.config.getSparkEngineTaskImpactInstanceEnabled().booleanValue()) {
            Path shareDir = this.config.getJobTmpShareDir(this.project, this.jobId);
            String maxLeafTasksNums = this.maxLeafTasksNums(shareDir);
            logger.info("The maximum number of tasks required to run the job is {}", (Object)maxLeafTasksNums);
            int factor = this.config.getSparkEngineTaskCoreFactor();
            int i = Double.valueOf(maxLeafTasksNums).intValue() / factor;
            logger.info("require cores: " + i);
            return String.valueOf(i);
        }
        return this.config.getSparkEngineRequiredTotalCores();
    }

    private String maxLeafTasksNums(Path shareDir) throws IOException {
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        FileStatus[] fileStatuses = fs.listStatus(shareDir, path -> path.toString().endsWith(ResourceDetectUtils.cubingDetectItemFileSuffix()));
        return ResourceDetectUtils.selectMaxValueInFiles(fileStatuses);
    }

    private LayoutEntity buildCuboid(SegmentInfo seg, LayoutEntity cuboid, Dataset<Row> parent, SpanningTree spanningTree, long parentId) throws IOException {
        String parentName = String.valueOf(parentId);
        if (parentId == ParentSourceChooser.FLAT_TABLE_FLAG()) {
            parentName = "flat table";
        }
        logger.info("Build index:{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.id());
        LayoutEntity layoutEntity = cuboid;
        Set<Integer> dimIndexes = cuboid.getOrderedDimensions().keySet();
        if (cuboid.isTableIndex()) {
            Dataset afterPrj = parent.select(NSparkCubingUtil.getColumns(dimIndexes));
            logger.info("Build layout:{}, in index:{}", (Object)layoutEntity.getId(), (Object)cuboid.getId());
            this.ss.sparkContext().setJobDescription("build " + layoutEntity.getId() + " from parent " + parentName);
            Set<Integer> orderedDims = layoutEntity.getOrderedDimensions().keySet();
            Dataset afterSort = afterPrj.select(NSparkCubingUtil.getColumns(orderedDims)).sortWithinPartitions(NSparkCubingUtil.getColumns(orderedDims));
            this.saveAndUpdateLayout((Dataset<Row>)afterSort, seg, layoutEntity, parentId);
        } else {
            Dataset<Row> afterAgg = CuboidAggregator.agg(this.ss, parent, dimIndexes, cuboid.getOrderedMeasures(), spanningTree, false);
            logger.info("Build layout:{}, in index:{}", (Object)layoutEntity.getId(), (Object)cuboid.getId());
            this.ss.sparkContext().setJobDescription("build " + layoutEntity.getId() + " from parent " + parentName);
            Set<Integer> rowKeys = layoutEntity.getOrderedDimensions().keySet();
            Dataset afterSort = afterAgg.select(NSparkCubingUtil.getColumns(rowKeys, layoutEntity.getOrderedMeasures().keySet())).sortWithinPartitions(NSparkCubingUtil.getColumns(rowKeys));
            this.saveAndUpdateLayout((Dataset<Row>)afterSort, seg, layoutEntity, parentId);
        }
        this.ss.sparkContext().setJobDescription(null);
        logger.info("Finished Build index :{}, in segment:{}", (Object)cuboid.getId(), (Object)seg.id());
        return layoutEntity;
    }

    private void saveAndUpdateLayout(Dataset<Row> dataset, SegmentInfo seg, LayoutEntity layout, long parentId) throws IOException {
        long layoutId = layout.getId();
        String queryExecutionId = UUID.randomUUID().toString();
        this.ss.sparkContext().setLocalProperty(QueryExecutionCache.N_EXECUTION_ID_KEY(), queryExecutionId);
        NSparkCubingEngine.NSparkCubingStorage storage = StorageFactory.createEngineAdapter(layout, NSparkCubingEngine.NSparkCubingStorage.class);
        String path = PathManager.getParquetStoragePath(this.config, this.getParam("cubeName"), seg.name(), seg.identifier(), String.valueOf(layoutId));
        String tempPath = path + TEMP_DIR_SUFFIX;
        logger.info("Cuboids are saved to temp path : " + tempPath);
        storage.saveTo(tempPath, dataset, this.ss);
        JobMetrics metrics = JobMetricsUtils.collectMetrics(queryExecutionId);
        long rowCount = metrics.getMetrics(Metrics.CUBOID_ROWS_CNT());
        if (rowCount == -1L) {
            this.infos.recordAbnormalLayouts(layoutId, "'Job metrics seems null, use count() to collect cuboid rows.'");
            logger.debug("Can not get cuboid row cnt, use count() to collect cuboid rows.");
            long cuboidRowCnt = dataset.count();
            layout.setRows(cuboidRowCnt);
            this.cuboidsRowCount.putIfAbsent(layoutId, cuboidRowCnt);
            layout.setSourceRows(this.cuboidsRowCount.get(parentId));
        } else {
            layout.setRows(rowCount);
            layout.setSourceRows(metrics.getMetrics(Metrics.SOURCE_ROWS_CNT()));
        }
        int shardNum = BuildUtils.repartitionIfNeed(layout, storage, path, tempPath, this.cubeInstance.getConfig(), this.ss);
        layout.setShardNum(shardNum);
        this.cuboidShardNum.put(layoutId, (short)shardNum);
        this.ss.sparkContext().setLocalProperty(QueryExecutionCache.N_EXECUTION_ID_KEY(), null);
        QueryExecutionCache.removeQueryExecution(queryExecutionId);
        BuildUtils.fillCuboidInfo(layout, path);
    }

    private void updateExistingLayout(LayoutEntity layout, long parentId) throws IOException {
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        long layoutId = layout.getId();
        String path = PathManager.getParquetStoragePath(this.config, this.cubeInstance.getName(), this.optSegInfo.name(), this.optSegInfo.identifier(), String.valueOf(layoutId));
        Dataset<Row> dataset = StorageFactory.createEngineAdapter(layout, NSparkCubingEngine.NSparkCubingStorage.class).getFrom(path, this.ss);
        logger.debug("Existing cuboid, use count() to collect cuboid rows.");
        long cuboidRowCnt = dataset.count();
        ContentSummary cs = HadoopUtil.getContentSummary(fs, new Path(path));
        layout.setRows(cuboidRowCnt);
        layout.setFileCount(cs.getFileCount());
        layout.setByteSize(cs.getLength());
        this.cuboidsRowCount.putIfAbsent(layoutId, cuboidRowCnt);
        layout.setSourceRows(this.cuboidsRowCount.get(parentId));
        short shardNum = this.originalSeg.getCuboidShardNums().get(layoutId);
        layout.setShardNum(shardNum);
        this.optSegInfo.updateLayout(layout);
    }

    @Override
    protected String generateInfo() {
        return LogJobInfoUtils.dfOptimizeJobInfo();
    }
}

