/*
 * Decompiled with CFR 0.152.
 */
package org.apache.pinot.plugin.ingestion.batch.spark3;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.UUID;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.common.segment.generation.SegmentGenerationUtils;
import org.apache.pinot.common.utils.TarGzCompressionUtils;
import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationJobUtils;
import org.apache.pinot.plugin.ingestion.batch.common.SegmentGenerationTaskRunner;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.filesystem.PinotFS;
import org.apache.pinot.spi.filesystem.PinotFSFactory;
import org.apache.pinot.spi.ingestion.batch.runner.IngestionJobRunner;
import org.apache.pinot.spi.ingestion.batch.spec.PinotClusterSpec;
import org.apache.pinot.spi.ingestion.batch.spec.PinotFSSpec;
import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationJobSpec;
import org.apache.pinot.spi.ingestion.batch.spec.SegmentGenerationTaskSpec;
import org.apache.pinot.spi.plugin.PluginManager;
import org.apache.pinot.spi.utils.DataSizeUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SparkSegmentGenerationJobRunner
implements IngestionJobRunner,
Serializable {
    private static final Logger LOGGER = LoggerFactory.getLogger(SparkSegmentGenerationJobRunner.class);
    private static final String DEPS_JAR_DIR = "dependencyJarDir";
    private static final String STAGING_DIR = "stagingDir";
    private SegmentGenerationJobSpec _spec;

    public SparkSegmentGenerationJobRunner() {
    }

    public SparkSegmentGenerationJobRunner(SegmentGenerationJobSpec spec) {
        this.init(spec);
    }

    public void init(SegmentGenerationJobSpec spec) {
        PinotClusterSpec pinotClusterSpec;
        this._spec = spec;
        if (this._spec.getInputDirURI() == null) {
            throw new RuntimeException("Missing property 'inputDirURI' in 'jobSpec' file");
        }
        if (this._spec.getOutputDirURI() == null) {
            throw new RuntimeException("Missing property 'outputDirURI' in 'jobSpec' file");
        }
        if (this._spec.getRecordReaderSpec() == null) {
            throw new RuntimeException("Missing property 'recordReaderSpec' in 'jobSpec' file");
        }
        if (this._spec.getTableSpec() == null) {
            throw new RuntimeException("Missing property 'tableSpec' in 'jobSpec' file");
        }
        if (this._spec.getTableSpec().getTableName() == null) {
            throw new RuntimeException("Missing property 'tableName' in 'tableSpec'");
        }
        if (this._spec.getTableSpec().getSchemaURI() == null) {
            if (this._spec.getPinotClusterSpecs() == null || this._spec.getPinotClusterSpecs().length == 0) {
                throw new RuntimeException("Missing property 'schemaURI' in 'tableSpec'");
            }
            pinotClusterSpec = this._spec.getPinotClusterSpecs()[0];
            String schemaURI = SegmentGenerationUtils.generateSchemaURI((String)pinotClusterSpec.getControllerURI(), (String)this._spec.getTableSpec().getTableName());
            this._spec.getTableSpec().setSchemaURI(schemaURI);
        }
        if (this._spec.getTableSpec().getTableConfigURI() == null) {
            if (this._spec.getPinotClusterSpecs() == null || this._spec.getPinotClusterSpecs().length == 0) {
                throw new RuntimeException("Missing property 'tableConfigURI' in 'tableSpec'");
            }
            pinotClusterSpec = this._spec.getPinotClusterSpecs()[0];
            String tableConfigURI = SegmentGenerationUtils.generateTableConfigURI((String)pinotClusterSpec.getControllerURI(), (String)this._spec.getTableSpec().getTableName());
            this._spec.getTableSpec().setTableConfigURI(tableConfigURI);
        }
        if (this._spec.getExecutionFrameworkSpec().getExtraConfigs() == null) {
            this._spec.getExecutionFrameworkSpec().setExtraConfigs(new HashMap());
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void run() throws Exception {
        List pinotFSSpecs = this._spec.getPinotFSSpecs();
        for (PinotFSSpec pinotFSSpec : pinotFSSpecs) {
            PinotFSFactory.register((String)pinotFSSpec.getScheme(), (String)pinotFSSpec.getClassName(), (PinotConfiguration)new PinotConfiguration(pinotFSSpec));
        }
        URI inputDirURI = new URI(this._spec.getInputDirURI());
        if (inputDirURI.getScheme() == null) {
            inputDirURI = new File(this._spec.getInputDirURI()).toURI();
        }
        PinotFS inputDirFS = PinotFSFactory.create((String)inputDirURI.getScheme());
        List filteredFiles = SegmentGenerationUtils.listMatchedFilesWithRecursiveOption((PinotFS)inputDirFS, (URI)inputDirURI, (String)this._spec.getIncludeFileNamePattern(), (String)this._spec.getExcludeFileNamePattern(), (boolean)this._spec.isSearchRecursively());
        LOGGER.info("Found {} files to create Pinot segments!", (Object)filteredFiles.size());
        URI outputDirURI = new URI(this._spec.getOutputDirURI());
        if (outputDirURI.getScheme() == null) {
            outputDirURI = new File(this._spec.getOutputDirURI()).toURI();
        }
        PinotFS outputDirFS = PinotFSFactory.create((String)outputDirURI.getScheme());
        outputDirFS.mkdir(outputDirURI);
        String stagingDir = (String)this._spec.getExecutionFrameworkSpec().getExtraConfigs().get(STAGING_DIR);
        URI stagingDirURI = null;
        if (stagingDir != null) {
            stagingDirURI = URI.create(stagingDir);
            if (stagingDirURI.getScheme() == null) {
                stagingDirURI = new File(stagingDir).toURI();
            }
            if (!outputDirURI.getScheme().equals(stagingDirURI.getScheme())) {
                throw new RuntimeException(String.format("The scheme of staging directory URI [%s] and output directory URI [%s] has to be same.", stagingDirURI, outputDirURI));
            }
            outputDirFS.mkdir(stagingDirURI);
        }
        try {
            JavaSparkContext sparkContext = JavaSparkContext.fromSparkContext((SparkContext)SparkContext.getOrCreate());
            this.packPluginsToDistributedCache(sparkContext);
            if (this._spec.getExecutionFrameworkSpec().getExtraConfigs().containsKey(DEPS_JAR_DIR)) {
                this.addDepsJarToDistributedCache(sparkContext, (String)this._spec.getExecutionFrameworkSpec().getExtraConfigs().get(DEPS_JAR_DIR));
            }
            ArrayList<String> pathAndIdxList = new ArrayList<String>();
            if (!SegmentGenerationJobUtils.useGlobalDirectorySequenceId(this._spec.getSegmentNameGeneratorSpec())) {
                HashMap localDirIndex = new HashMap();
                for (String filteredFile : filteredFiles) {
                    Path filteredParentPath = Paths.get(filteredFile, new String[0]).getParent();
                    if (!localDirIndex.containsKey(filteredParentPath.toString())) {
                        localDirIndex.put(filteredParentPath.toString(), new ArrayList());
                    }
                    ((List)localDirIndex.get(filteredParentPath.toString())).add(filteredFile);
                }
                for (String parentPath : localDirIndex.keySet()) {
                    List siblingFiles = (List)localDirIndex.get(parentPath);
                    Collections.sort(siblingFiles);
                    for (int i = 0; i < siblingFiles.size(); ++i) {
                        pathAndIdxList.add(String.format("%s %d", siblingFiles.get(i), i));
                    }
                }
            } else {
                for (int i = 0; i < filteredFiles.size(); ++i) {
                    pathAndIdxList.add(String.format("%s %d", filteredFiles.get(i), i));
                }
            }
            int numDataFiles = pathAndIdxList.size();
            int jobParallelism = this._spec.getSegmentCreationJobParallelism();
            if (jobParallelism <= 0 || jobParallelism > numDataFiles) {
                jobParallelism = numDataFiles;
            }
            JavaRDD pathRDD = sparkContext.parallelize(pathAndIdxList, jobParallelism);
            final String pluginsInclude = sparkContext.getConf().contains("plugins.include") ? sparkContext.getConf().get("plugins.include") : null;
            final URI finalInputDirURI = inputDirURI;
            final URI finalOutputDirURI = stagingDirURI == null ? outputDirURI : stagingDirURI;
            pathRDD.foreach((VoidFunction)new VoidFunction<String>(){

                public void call(String pathAndIdx) throws Exception {
                    PluginManager.get().init();
                    for (PinotFSSpec pinotFSSpec : SparkSegmentGenerationJobRunner.this._spec.getPinotFSSpecs()) {
                        PinotFSFactory.register((String)pinotFSSpec.getScheme(), (String)pinotFSSpec.getClassName(), (PinotConfiguration)new PinotConfiguration(pinotFSSpec));
                    }
                    PinotFS finalOutputDirFS = PinotFSFactory.create((String)finalOutputDirURI.getScheme());
                    String[] splits = pathAndIdx.split(" ");
                    String path = splits[0];
                    int idx = Integer.valueOf(splits[1]);
                    File localPluginsTarFile = new File("pinot-plugins.tar.gz");
                    if (localPluginsTarFile.exists()) {
                        File pluginsDirFile = new File("pinot-plugins-dir-" + idx);
                        try {
                            TarGzCompressionUtils.untar((File)localPluginsTarFile, (File)pluginsDirFile);
                        }
                        catch (Exception e) {
                            LOGGER.error("Failed to untar local Pinot plugins tarball file [{}]", (Object)localPluginsTarFile, (Object)e);
                            throw new RuntimeException(e);
                        }
                        LOGGER.info("Trying to set System Property: [{}={}]", (Object)"plugins.dir", (Object)pluginsDirFile.getAbsolutePath());
                        System.setProperty("plugins.dir", pluginsDirFile.getAbsolutePath());
                        if (pluginsInclude != null) {
                            LOGGER.info("Trying to set System Property: [{}={}]", (Object)"plugins.include", (Object)pluginsInclude);
                            System.setProperty("plugins.include", pluginsInclude);
                        }
                        LOGGER.info("Pinot plugins System Properties are set at [{}], plugins includes [{}]", (Object)System.getProperty("plugins.dir"), (Object)System.getProperty("plugins.include"));
                    } else {
                        LOGGER.warn("Cannot find local Pinot plugins tar file at [{}]", (Object)localPluginsTarFile.getAbsolutePath());
                    }
                    URI inputFileURI = URI.create(path);
                    if (inputFileURI.getScheme() == null) {
                        inputFileURI = new URI(finalInputDirURI.getScheme(), inputFileURI.getSchemeSpecificPart(), inputFileURI.getFragment());
                    }
                    File localTempDir = new File(FileUtils.getTempDirectory(), "pinot-" + UUID.randomUUID());
                    File localInputTempDir = new File(localTempDir, "input");
                    FileUtils.forceMkdir((File)localInputTempDir);
                    File localOutputTempDir = new File(localTempDir, "output");
                    FileUtils.forceMkdir((File)localOutputTempDir);
                    File localInputDataFile = new File(localInputTempDir, SegmentGenerationUtils.getFileName((URI)inputFileURI));
                    LOGGER.info("Trying to copy input file from {} to {}", (Object)inputFileURI, (Object)localInputDataFile);
                    PinotFSFactory.create((String)inputFileURI.getScheme()).copyToLocalFile(inputFileURI, localInputDataFile);
                    SegmentGenerationTaskSpec taskSpec = new SegmentGenerationTaskSpec();
                    taskSpec.setInputFilePath(localInputDataFile.getAbsolutePath());
                    taskSpec.setOutputDirectoryPath(localOutputTempDir.getAbsolutePath());
                    taskSpec.setRecordReaderSpec(SparkSegmentGenerationJobRunner.this._spec.getRecordReaderSpec());
                    taskSpec.setSchema(SegmentGenerationUtils.getSchema((String)SparkSegmentGenerationJobRunner.this._spec.getTableSpec().getSchemaURI(), (String)SparkSegmentGenerationJobRunner.this._spec.getAuthToken()));
                    taskSpec.setTableConfig(SegmentGenerationUtils.getTableConfig((String)SparkSegmentGenerationJobRunner.this._spec.getTableSpec().getTableConfigURI(), (String)SparkSegmentGenerationJobRunner.this._spec.getAuthToken()));
                    taskSpec.setSequenceId(idx);
                    taskSpec.setSegmentNameGeneratorSpec(SparkSegmentGenerationJobRunner.this._spec.getSegmentNameGeneratorSpec());
                    taskSpec.setFailOnEmptySegment(SparkSegmentGenerationJobRunner.this._spec.isFailOnEmptySegment());
                    taskSpec.setCustomProperty("input.data.file.uri", inputFileURI.toString());
                    SegmentGenerationTaskRunner taskRunner = new SegmentGenerationTaskRunner(taskSpec);
                    String segmentName = taskRunner.run();
                    File localSegmentDir = new File(localOutputTempDir, segmentName);
                    String segmentTarFileName = URLEncoder.encode(segmentName + ".tar.gz", "UTF-8");
                    File localSegmentTarFile = new File(localOutputTempDir, segmentTarFileName);
                    LOGGER.info("Tarring segment from: {} to: {}", (Object)localSegmentDir, (Object)localSegmentTarFile);
                    TarGzCompressionUtils.createTarGzFile((File)localSegmentDir, (File)localSegmentTarFile);
                    long uncompressedSegmentSize = FileUtils.sizeOf((File)localSegmentDir);
                    long compressedSegmentSize = FileUtils.sizeOf((File)localSegmentTarFile);
                    LOGGER.info("Size for segment: {}, uncompressed: {}, compressed: {}", segmentName, DataSizeUtils.fromBytes((long)uncompressedSegmentSize), DataSizeUtils.fromBytes((long)compressedSegmentSize));
                    URI outputSegmentTarURI = SegmentGenerationUtils.getRelativeOutputPath((URI)finalInputDirURI, (URI)inputFileURI, (URI)finalOutputDirURI).resolve(segmentTarFileName);
                    LOGGER.info("Trying to move segment tar file from: [{}] to [{}]", (Object)localSegmentTarFile, (Object)outputSegmentTarURI);
                    if (!SparkSegmentGenerationJobRunner.this._spec.isOverwriteOutput() && PinotFSFactory.create((String)outputSegmentTarURI.getScheme()).exists(outputSegmentTarURI)) {
                        LOGGER.warn("Not overwrite existing output segment tar file: {}", (Object)finalOutputDirFS.exists(outputSegmentTarURI));
                    } else {
                        finalOutputDirFS.copyFromLocalFile(localSegmentTarFile, outputSegmentTarURI);
                    }
                    FileUtils.deleteQuietly((File)localSegmentDir);
                    FileUtils.deleteQuietly((File)localSegmentTarFile);
                    FileUtils.deleteQuietly((File)localInputDataFile);
                }
            });
            if (stagingDirURI != null) {
                LOGGER.info("Trying to copy segment tars from staging directory: [{}] to output directory [{}]", (Object)stagingDirURI, (Object)outputDirURI);
                outputDirFS.copyDir(stagingDirURI, outputDirURI);
            }
        }
        finally {
            if (stagingDirURI != null) {
                LOGGER.info("Trying to clean up staging directory: [{}]", (Object)stagingDirURI);
                outputDirFS.delete(stagingDirURI, true);
            }
        }
    }

    protected void addDepsJarToDistributedCache(JavaSparkContext sparkContext, String depsJarDir) throws IOException {
        if (depsJarDir != null) {
            String[] files;
            URI depsJarDirURI = URI.create(depsJarDir);
            if (depsJarDirURI.getScheme() == null) {
                depsJarDirURI = new File(depsJarDir).toURI();
            }
            PinotFS pinotFS = PinotFSFactory.create((String)depsJarDirURI.getScheme());
            for (String file : files = pinotFS.listFiles(depsJarDirURI, true)) {
                if (pinotFS.isDirectory(URI.create(file)) || !file.endsWith(".jar")) continue;
                LOGGER.info("Adding deps jar: {} to distributed cache", (Object)file);
                sparkContext.addJar(file);
            }
        }
    }

    protected void packPluginsToDistributedCache(JavaSparkContext sparkContext) {
        String[] pluginDirectories = PluginManager.get().getPluginsDirectories();
        if (pluginDirectories == null) {
            LOGGER.warn("Plugin directories is null, skipping packaging...");
            return;
        }
        ArrayList<File> validPluginDirectories = new ArrayList<File>();
        for (String pluginsDirPath : pluginDirectories) {
            File pluginsDir = new File(pluginsDirPath);
            if (!pluginsDir.exists()) {
                LOGGER.warn("Cannot find Pinot plugins directory at [{}]", (Object)pluginsDirPath);
                return;
            }
            validPluginDirectories.add(pluginsDir);
        }
        File pluginsTarGzFile = new File("pinot-plugins.tar.gz");
        try {
            File[] files = validPluginDirectories.toArray(new File[0]);
            TarGzCompressionUtils.createTarGzFile((File[])files, (File)pluginsTarGzFile);
        }
        catch (IOException e) {
            LOGGER.error("Failed to tar plugins directories", e);
        }
        sparkContext.addFile(pluginsTarGzFile.getAbsolutePath());
        String pluginsIncludes = System.getProperty("plugins.include");
        if (pluginsIncludes != null) {
            sparkContext.getConf().set("plugins.include", pluginsIncludes);
        }
    }
}

