/*
 * Decompiled with CFR 0.152.
 */
package org.apache.accumulo.core.file.rfile;

import com.beust.jcommander.Parameter;
import com.google.auto.service.AutoService;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.accumulo.core.cli.ConfigOpts;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.SiteConfiguration;
import org.apache.accumulo.core.crypto.CryptoFactoryLoader;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.file.rfile.PrintInfo;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iteratorsImpl.system.MultiIterator;
import org.apache.accumulo.core.spi.crypto.CryptoEnvironment;
import org.apache.accumulo.core.spi.crypto.CryptoService;
import org.apache.accumulo.core.util.TextUtil;
import org.apache.accumulo.start.spi.KeywordExecutable;
import org.apache.datasketches.quantiles.ItemsSketch;
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
import org.apache.datasketches.quantilescommon.QuantilesUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@AutoService(value={KeywordExecutable.class})
@SuppressFBWarnings(value={"PATH_TRAVERSAL_OUT"}, justification="app is run in same security context as user providing the filename")
public class GenerateSplits
implements KeywordExecutable {
    private static final Logger log = LoggerFactory.getLogger(GenerateSplits.class);

    public String keyword() {
        return "generate-splits";
    }

    public String description() {
        return "Generate split points from a set of 1 or more rfiles";
    }

    public static void main(String[] args) throws Exception {
        new GenerateSplits().execute(args);
    }

    public void execute(String[] args) throws Exception {
        TreeSet<String> desiredSplits;
        TreeSet<String> splits;
        Opts opts = new Opts();
        opts.parseArgs(GenerateSplits.class.getName(), args, new Object[0]);
        if (opts.files.isEmpty()) {
            throw new IllegalArgumentException("No files were given");
        }
        Configuration hadoopConf = new Configuration();
        SiteConfiguration siteConf = opts.getSiteConfiguration();
        CryptoService cryptoService = CryptoFactoryLoader.getServiceForClient(CryptoEnvironment.Scope.TABLE, siteConf.getAllCryptoProperties());
        boolean encode = opts.base64encode;
        if (opts.numSplits > 0 && opts.splitSize > 0L) {
            throw new IllegalArgumentException("Requested number of splits and split size.");
        }
        if (opts.numSplits == 0 && opts.splitSize == 0L) {
            throw new IllegalArgumentException("Required number of splits or split size.");
        }
        int requestedNumSplits = opts.numSplits;
        long splitSize = opts.splitSize;
        FileSystem fs = FileSystem.get((Configuration)hadoopConf);
        ArrayList<Path> filePaths = new ArrayList<Path>();
        for (String file : opts.files) {
            Path path = new Path(file);
            fs = PrintInfo.resolveFS(log, hadoopConf, path);
            filePaths.addAll(this.getFiles(fs, path));
        }
        if (filePaths.isEmpty()) {
            throw new IllegalArgumentException("No files were found in " + opts.files);
        }
        log.trace("Found the following files: {}", filePaths);
        if (opts.splitSize == 0L) {
            splits = this.getIndexKeys(siteConf, hadoopConf, fs, filePaths, requestedNumSplits, encode, cryptoService);
            if (splits.size() < requestedNumSplits) {
                log.info("Only found {} indexed keys but need {}. Doing a full scan on files {}", new Object[]{splits.size(), requestedNumSplits, filePaths});
                splits = this.getSplitsFromFullScan(siteConf, hadoopConf, filePaths, fs, requestedNumSplits, encode, cryptoService);
            }
        } else {
            splits = this.getSplitsBySize(siteConf, hadoopConf, filePaths, fs, splitSize, encode, cryptoService);
        }
        int numFound = splits.size();
        if (opts.splitSize == 0L && numFound > requestedNumSplits) {
            desiredSplits = GenerateSplits.getEvenlySpacedSplits(numFound, requestedNumSplits, splits.iterator());
        } else {
            if (numFound < requestedNumSplits) {
                log.warn("Only found {} splits", (Object)numFound);
            }
            desiredSplits = splits;
        }
        log.info("Generated {} splits", (Object)desiredSplits.size());
        if (opts.outputFile != null) {
            log.info("Writing splits to file {} ", (Object)opts.outputFile);
            try (PrintWriter writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(opts.outputFile), StandardCharsets.UTF_8)));){
                desiredSplits.forEach(writer::println);
            }
        } else {
            desiredSplits.forEach(System.out::println);
        }
    }

    private List<Path> getFiles(FileSystem fs, Path path) throws IOException {
        ArrayList<Path> filePaths = new ArrayList<Path>();
        if (fs.getFileStatus(path).isDirectory()) {
            RemoteIterator iter = fs.listFiles(path, true);
            while (iter.hasNext()) {
                filePaths.addAll(this.getFiles(fs, ((LocatedFileStatus)iter.next()).getPath()));
            }
        } else {
            if (!path.toString().endsWith(".rf")) {
                throw new IllegalArgumentException("Provided file (" + path + ") does not end with '.rf'");
            }
            filePaths.add(path);
        }
        return filePaths;
    }

    private Text[] getQuantiles(SortedKeyValueIterator<Key, Value> iterator, int numSplits) throws IOException {
        ItemsSketch itemsSketch = ItemsSketch.getInstance(Text.class, BinaryComparable::compareTo);
        while (iterator.hasTop()) {
            Text row = iterator.getTopKey().getRow();
            itemsSketch.update((Object)row);
            iterator.next();
        }
        double[] ranks = QuantilesUtil.equallyWeightedRanks((int)(numSplits + 1));
        Text[] items = (Text[])itemsSketch.getQuantiles(ranks, QuantileSearchCriteria.EXCLUSIVE);
        return Arrays.copyOfRange(items, 1, items.length - 1);
    }

    static TreeSet<String> getEvenlySpacedSplits(int numFound, long requestedNumSplits, Iterator<String> splitsIter) {
        TreeSet<String> desiredSplits = new TreeSet<String>();
        double increment = ((double)requestedNumSplits + 1.0) / (double)numFound;
        log.debug("Found {} splits but requested {} so picking incrementally by {}", new Object[]{numFound, requestedNumSplits, increment});
        double progressToNextSplit = 0.0;
        for (int i = 0; i < numFound; ++i) {
            progressToNextSplit += increment;
            String next = splitsIter.next();
            if (!(progressToNextSplit > 1.0) || (long)desiredSplits.size() >= requestedNumSplits) continue;
            desiredSplits.add(next);
            progressToNextSplit -= 1.0;
        }
        return desiredSplits;
    }

    private static String encode(boolean encode, Text text) {
        if (text == null) {
            return null;
        }
        byte[] bytes = TextUtil.getBytes(text);
        if (encode) {
            return Base64.getEncoder().encodeToString(bytes);
        }
        StringBuilder sb = new StringBuilder();
        for (byte aByte : bytes) {
            int c = 0xFF & aByte;
            if (c == 92) {
                sb.append("\\\\");
                continue;
            }
            if (c >= 32 && c <= 126) {
                sb.append((char)c);
                continue;
            }
            log.debug("Dropping non printable char: \\x{}", (Object)Integer.toHexString(c));
        }
        return sb.toString();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private TreeSet<String> getIndexKeys(AccumuloConfiguration accumuloConf, Configuration hadoopConf, FileSystem fs, List<Path> files, int requestedNumSplits, boolean base64encode, CryptoService cs) throws IOException {
        Text[] splitArray;
        ArrayList<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<SortedKeyValueIterator<Key, Value>>(files.size());
        ArrayList<FileSKVIterator> fileReaders = new ArrayList<FileSKVIterator>(files.size());
        try {
            for (Path file : files) {
                FileSKVIterator reader = FileOperations.getInstance().newIndexReaderBuilder().forFile(file.toString(), fs, hadoopConf, cs).withTableConfiguration(accumuloConf).build();
                readers.add(reader);
                fileReaders.add(reader);
            }
            MultiIterator iterator = new MultiIterator(readers, true);
            splitArray = this.getQuantiles(iterator, requestedNumSplits);
        }
        finally {
            for (FileSKVIterator r : fileReaders) {
                r.close();
            }
        }
        log.debug("Got {} splits from indices of {}", (Object)splitArray.length, files);
        return Arrays.stream(splitArray).map(t -> GenerateSplits.encode(base64encode, t)).collect(Collectors.toCollection(TreeSet::new));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private TreeSet<String> getSplitsFromFullScan(SiteConfiguration accumuloConf, Configuration hadoopConf, List<Path> files, FileSystem fs, int numSplits, boolean base64encode, CryptoService cs) throws IOException {
        Text[] splitArray;
        ArrayList<FileSKVIterator> fileReaders = new ArrayList<FileSKVIterator>(files.size());
        ArrayList<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<SortedKeyValueIterator<Key, Value>>(files.size());
        try {
            for (Path file : files) {
                FileSKVIterator reader = FileOperations.getInstance().newScanReaderBuilder().forFile(file.toString(), fs, hadoopConf, cs).withTableConfiguration(accumuloConf).overRange(new Range(), Set.of(), false).build();
                readers.add(reader);
                fileReaders.add(reader);
            }
            MultiIterator iterator = new MultiIterator(readers, false);
            iterator.seek(new Range(), Collections.emptySet(), false);
            splitArray = this.getQuantiles(iterator, numSplits);
        }
        finally {
            for (FileSKVIterator r : fileReaders) {
                r.close();
            }
        }
        log.debug("Got {} splits from quantiles across {} files", (Object)splitArray.length, (Object)files.size());
        return Arrays.stream(splitArray).map(t -> GenerateSplits.encode(base64encode, t)).collect(Collectors.toCollection(TreeSet::new));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private TreeSet<String> getSplitsBySize(AccumuloConfiguration accumuloConf, Configuration hadoopConf, List<Path> files, FileSystem fs, long splitSize, boolean base64encode, CryptoService cs) throws IOException {
        long currentSplitSize = 0L;
        long totalSize = 0L;
        TreeSet<String> splits = new TreeSet<String>();
        ArrayList<FileSKVIterator> fileReaders = new ArrayList<FileSKVIterator>(files.size());
        ArrayList<SortedKeyValueIterator<Key, Value>> readers = new ArrayList<SortedKeyValueIterator<Key, Value>>(files.size());
        try {
            for (Path file : files) {
                FileSKVIterator reader = FileOperations.getInstance().newScanReaderBuilder().forFile(file.toString(), fs, hadoopConf, cs).withTableConfiguration(accumuloConf).overRange(new Range(), Set.of(), false).build();
                readers.add(reader);
                fileReaders.add(reader);
            }
            MultiIterator iterator = new MultiIterator(readers, false);
            iterator.seek(new Range(), Collections.emptySet(), false);
            while (iterator.hasTop()) {
                Key key = (Key)iterator.getTopKey();
                Value val = (Value)iterator.getTopValue();
                int size = key.getSize() + val.getSize();
                totalSize += (long)size;
                if ((currentSplitSize += (long)size) > splitSize) {
                    splits.add(GenerateSplits.encode(base64encode, key.getRow()));
                    currentSplitSize = 0L;
                }
                iterator.next();
            }
        }
        finally {
            for (FileSKVIterator r : fileReaders) {
                r.close();
            }
        }
        log.debug("Got {} splits with split size {} out of {} total bytes read across {} files", new Object[]{splits.size(), splitSize, totalSize, files.size()});
        return splits;
    }

    static class Opts
    extends ConfigOpts {
        @Parameter(names={"-n", "--num"}, description="The number of split points to generate. Can be used to create n+1 tablets. Cannot use with the split size option.")
        public int numSplits = 0;
        @Parameter(names={"-ss", "--split-size"}, description="The minimum split size in uncompressed bytes. Cannot use with num splits option.")
        public long splitSize = 0L;
        @Parameter(names={"-b64", "--base64encoded"}, description="Base 64 encode the split points")
        public boolean base64encode = false;
        @Parameter(names={"-sf", "--splits-file"}, description="Output the splits to a file")
        public String outputFile;
        @Parameter(description="<file|directory>[ <file|directory>...] -n <num> | -ss <split_size>")
        public List<String> files = new ArrayList<String>();

        Opts() {
        }
    }
}

