/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.transform;

import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.io.Serializable;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.io.MatrixReader;
import org.apache.sysml.runtime.matrix.CSVReblockMR;
import org.apache.sysml.runtime.matrix.mapred.MRConfigurationNames;
import org.apache.sysml.runtime.transform.BinAgent;
import org.apache.sysml.runtime.transform.DataTransform;
import org.apache.sysml.runtime.transform.DummycodeAgent;
import org.apache.sysml.runtime.transform.MVImputeAgent;
import org.apache.sysml.runtime.transform.OmitAgent;
import org.apache.sysml.runtime.transform.RecodeAgent;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

public class TfUtils
implements Serializable {
    private static final long serialVersionUID = 526252850872633125L;
    public static final String TXMETHOD_IMPUTE = "impute";
    public static final String TXMETHOD_RECODE = "recode";
    public static final String TXMETHOD_BIN = "bin";
    public static final String TXMETHOD_DUMMYCODE = "dummycode";
    public static final String TXMETHOD_SCALE = "scale";
    public static final String TXMETHOD_OMIT = "omit";
    public static final String TXMETHOD_MVRCD = "mvrcd";
    public static final String TXMTD_MVPREFIX = "#Meta\u00b7MV";
    public static final String TXMTD_NDPREFIX = "#Meta\u00b7ND";
    public static final String TXMTD_SEP = ",";
    public static final String TXMTD_COLTYPES = "coltypes.csv";
    public static final String TXMTD_COLNAMES = "column.names";
    public static final String TXMTD_DC_COLNAMES = "dummycoded.column.names";
    public static final String TXMTD_RCD_MAP_SUFFIX = ".map";
    public static final String TXMTD_RCD_DISTINCT_SUFFIX = ".ndistinct";
    public static final String TXMTD_BIN_FILE_SUFFIX = ".bin";
    public static final String TXMTD_MV_FILE_SUFFIX = ".impute";
    public static final String JSON_ATTRS = "attributes";
    public static final String JSON_MTHD = "methods";
    public static final String JSON_CONSTS = "constants";
    public static final String JSON_NBINS = "numbins";
    protected static final String MODE_FILE_SUFFIX = ".mode";
    protected static final String SCALE_FILE_SUFFIX = ".scale";
    protected static final String DCD_FILE_NAME = "dummyCodeMaps.csv";
    protected static final String DCD_NAME_SEP = "_";
    private OmitAgent _oa = null;
    private MVImputeAgent _mia = null;
    private RecodeAgent _ra = null;
    private BinAgent _ba = null;
    private DummycodeAgent _da = null;
    private long _numRecordsInPartFile;
    private long _numValidRecords;
    private long _numTransformedRows;
    private long _numTransformedColumns;
    private String _headerLine = null;
    private boolean _hasHeader;
    private Pattern _delim = null;
    private String _delimString = null;
    private String[] _NAstrings = null;
    private String[] _outputColumnNames = null;
    private int _numInputCols = -1;
    private String _tfMtdDir = null;
    private String _spec = null;
    private String _offsetFile = null;
    private String _tmpDir = null;
    private String _outputPath = null;

    public TfUtils(JobConf job, boolean minimal) throws IOException, JSONException {
        if (!InfrastructureAnalyzer.isLocalMode(job)) {
            ConfigurationManager.setCachedJobConf(job);
        }
        this._NAstrings = TfUtils.parseNAStrings(job);
        this._spec = job.get("transform.specification");
        this._oa = new OmitAgent(new JSONObject(this._spec), null, -1);
    }

    public TfUtils(JobConf job) throws IOException, JSONException {
        if (!InfrastructureAnalyzer.isLocalMode(job)) {
            ConfigurationManager.setCachedJobConf(job);
        }
        boolean hasHeader = Boolean.parseBoolean(job.get("transform.has.header"));
        String[] naStrings = TfUtils.parseNAStrings(job);
        long numCols = UtilFunctions.parseToLong(job.get("transform.num.columns"));
        String spec = job.get("transform.specification");
        String offsetFile = job.get("transform.offsets.file");
        String tmpPath = job.get("transform.temp.location");
        String outputPath = FileOutputFormat.getOutputPath((JobConf)job).toString();
        JSONObject jspec = new JSONObject(spec);
        this.init(job.get("transform.header.line"), hasHeader, job.get("transform.field.delimiter"), naStrings, jspec, numCols, offsetFile, tmpPath, outputPath);
    }

    public TfUtils(JobConf job, String tfMtdDir) throws IOException, JSONException {
        this(job);
        this._tfMtdDir = tfMtdDir;
    }

    public TfUtils(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long ncol, String tfMtdDir, String offsetFile, String tmpPath) throws IOException, JSONException {
        this.init(headerLine, hasHeader, delim, naStrings, spec, ncol, offsetFile, tmpPath, null);
        this._tfMtdDir = tfMtdDir;
    }

    protected static boolean checkValidInputFile(FileSystem fs, Path path, boolean err) throws IOException {
        if (!fs.exists(path)) {
            if (err) {
                throw new IOException("File " + path.toString() + " does not exist on HDFS/LFS.");
            }
            return false;
        }
        if (MapReduceTool.isFileEmpty(fs, path.toString())) {
            if (err) {
                throw new EOFException("Empty input file " + path.toString() + ".");
            }
            return false;
        }
        return true;
    }

    public static String getPartFileName(JobConf job) throws IOException {
        FileSystem fs = FileSystem.get((Configuration)job);
        Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE)).makeQualified(fs);
        return thisPath.toString();
    }

    public static boolean isPartFileWithHeader(JobConf job) throws IOException {
        FileSystem fs = FileSystem.get((Configuration)job);
        String thisfile = TfUtils.getPartFileName(job);
        Path smallestFilePath = new Path(job.get("transform.smallest.file")).makeQualified(fs);
        return thisfile.toString().equals(smallestFilePath.toString());
    }

    public static String prepNAStrings(String na) {
        return na + "\u00b7" + "dummy";
    }

    public static String[] parseNAStrings(String na) {
        if (na == null) {
            return null;
        }
        String[] tmp = Pattern.compile(Pattern.quote("\u00b7")).split(na, -1);
        return tmp;
    }

    public static String[] parseNAStrings(JobConf job) {
        return TfUtils.parseNAStrings(job.get("transform.na.strings"));
    }

    private void createAgents(JSONObject spec, String[] naStrings) throws IOException, JSONException {
        this._oa = new OmitAgent(spec, this._outputColumnNames, this._numInputCols);
        this._mia = new MVImputeAgent(spec, null, naStrings, this._numInputCols);
        this._ra = new RecodeAgent(spec, this._outputColumnNames, this._numInputCols);
        this._ba = new BinAgent(spec, this._outputColumnNames, this._numInputCols);
        this._da = new DummycodeAgent(spec, this._outputColumnNames, this._numInputCols);
    }

    private void parseColumnNames() {
        this._outputColumnNames = this._delim.split(this._headerLine, -1);
        for (int i = 0; i < this._outputColumnNames.length; ++i) {
            this._outputColumnNames[i] = UtilFunctions.unquote(this._outputColumnNames[i]);
        }
    }

    private void init(String headerLine, boolean hasHeader, String delim, String[] naStrings, JSONObject spec, long numCols, String offsetFile, String tmpPath, String outputPath) throws IOException, JSONException {
        this._numRecordsInPartFile = 0L;
        this._numValidRecords = 0L;
        this._numTransformedRows = 0L;
        this._numTransformedColumns = 0L;
        this._headerLine = headerLine;
        this._hasHeader = hasHeader;
        this._delimString = delim;
        this._delim = Pattern.compile(Pattern.quote(delim));
        this._NAstrings = naStrings;
        this._numInputCols = (int)numCols;
        this._offsetFile = offsetFile;
        this._tmpDir = tmpPath;
        this._outputPath = outputPath;
        this.parseColumnNames();
        this.createAgents(spec, naStrings);
    }

    public void incrValid() {
        ++this._numValidRecords;
    }

    public long getValid() {
        return this._numValidRecords;
    }

    public long getTotal() {
        return this._numRecordsInPartFile;
    }

    public long getNumTransformedRows() {
        return this._numTransformedRows;
    }

    public long getNumTransformedColumns() {
        return this._numTransformedColumns;
    }

    public String getHeader() {
        return this._headerLine;
    }

    public boolean hasHeader() {
        return this._hasHeader;
    }

    public String getDelimString() {
        return this._delimString;
    }

    public Pattern getDelim() {
        return this._delim;
    }

    public String[] getNAStrings() {
        return this._NAstrings;
    }

    public long getNumCols() {
        return this._numInputCols;
    }

    public String getSpec() {
        return this._spec;
    }

    public String getTfMtdDir() {
        return this._tfMtdDir;
    }

    public String getOffsetFile() {
        return this._offsetFile;
    }

    public String getTmpDir() {
        return this._tmpDir;
    }

    public String getOutputPath() {
        return this._outputPath;
    }

    public String getName(int colID) {
        return this._outputColumnNames[colID - 1];
    }

    public void setValid(long n) {
        this._numValidRecords = n;
    }

    public void incrTotal() {
        ++this._numRecordsInPartFile;
    }

    public void setTotal(long n) {
        this._numRecordsInPartFile = n;
    }

    public OmitAgent getOmitAgent() {
        return this._oa;
    }

    public MVImputeAgent getMVImputeAgent() {
        return this._mia;
    }

    public RecodeAgent getRecodeAgent() {
        return this._ra;
    }

    public BinAgent getBinAgent() {
        return this._ba;
    }

    public DummycodeAgent getDummycodeAgent() {
        return this._da;
    }

    public static boolean isNA(String[] NAstrings, String w) {
        if (NAstrings == null) {
            return false;
        }
        for (String na : NAstrings) {
            if (!w.equals(na)) continue;
            return true;
        }
        return false;
    }

    public String[] getWords(Text line) {
        return this.getWords(line.toString());
    }

    public String[] getWords(String line) {
        return this.getDelim().split(line.trim(), -1);
    }

    public String[] prepareTfMtd(String line) throws IOException {
        String[] words = this.getWords(line);
        if (!this.getOmitAgent().omit(words, this)) {
            this.getMVImputeAgent().prepare(words);
            this.getRecodeAgent().prepare(words, this);
            this.getBinAgent().prepare(words, this);
            this.incrValid();
        }
        this.incrTotal();
        return words;
    }

    public void loadTfMetadata() throws IOException {
        JobConf job = ConfigurationManager.getCachedJobConf();
        this.loadTfMetadata(job, false);
    }

    public void loadTfMetadata(JobConf job, boolean fromLocalFS) throws IOException {
        Path tfMtdDir = null;
        FileSystem fs = null;
        if (fromLocalFS) {
            tfMtdDir = DistributedCache.getLocalCacheFiles((Configuration)job)[0];
            fs = FileSystem.getLocal((Configuration)job);
        } else {
            fs = FileSystem.get((Configuration)job);
            tfMtdDir = new Path(this.getTfMtdDir());
        }
        this.getMVImputeAgent().loadTxMtd(job, fs, tfMtdDir, this);
        this.getRecodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
        this.getBinAgent().loadTxMtd(job, fs, tfMtdDir, this);
        this.getDummycodeAgent().setRecodeMaps(this.getRecodeAgent().getRecodeMaps());
        this.getDummycodeAgent().setNumBins(this.getBinAgent().getColList(), this.getBinAgent().getNumBins());
        this.getDummycodeAgent().loadTxMtd(job, fs, tfMtdDir, this);
    }

    public String processHeaderLine() throws IOException {
        FileSystem fs = FileSystem.get((Configuration)ConfigurationManager.getCachedJobConf());
        String dcdHeader = this.getDummycodeAgent().constructDummycodedHeader(this.getHeader(), this.getDelim());
        this.getDummycodeAgent().genDcdMapsAndColTypes(fs, this.getTmpDir(), (int)this.getNumCols(), this);
        DataTransform.generateHeaderFiles(fs, this.getTmpDir(), this.getHeader(), dcdHeader);
        return dcdHeader;
    }

    public boolean omit(String[] words) {
        if (this.getOmitAgent() == null) {
            return false;
        }
        return this.getOmitAgent().omit(words, this);
    }

    public String[] apply(String[] words) {
        words = this.getMVImputeAgent().apply(words);
        words = this.getRecodeAgent().apply(words);
        words = this.getBinAgent().apply(words);
        words = this.getDummycodeAgent().apply(words);
        ++this._numTransformedRows;
        return words;
    }

    public void check(String[] words) throws DMLRuntimeException {
        boolean checkEmptyString;
        boolean bl = checkEmptyString = this.getNAStrings() != null;
        if (checkEmptyString) {
            String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
            for (int i = 0; i < words.length; ++i) {
                if (words[i] == null || !words[i].equals("")) continue;
                throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + this.getDummycodeAgent().mapDcdColumnID(i + 1));
            }
        }
    }

    public String checkAndPrepOutputString(String[] words) throws DMLRuntimeException {
        return this.checkAndPrepOutputString(words, new StringBuilder());
    }

    public String checkAndPrepOutputString(String[] words, StringBuilder sb) throws DMLRuntimeException {
        boolean checkEmptyString = this.getNAStrings() != null;
        sb.setLength(0);
        int i = 0;
        if (checkEmptyString) {
            String msg = "When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: ";
            if (words[0] != null) {
                if (words[0].equals("")) {
                    throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + this.getDummycodeAgent().mapDcdColumnID(1));
                }
                sb.append(words[0]);
            } else {
                sb.append("0");
            }
            for (i = 1; i < words.length; ++i) {
                sb.append(this._delimString);
                if (words[i] != null) {
                    if (words[i].equals("")) {
                        throw new DMLRuntimeException("When na.strings are provided, empty string \"\" is considered as a missing value, and it must be imputed appropriately. Encountered an unhandled empty string in column ID: " + this.getDummycodeAgent().mapDcdColumnID(i + 1));
                    }
                    sb.append(words[i]);
                    continue;
                }
                sb.append("0");
            }
        } else {
            sb.append(words[0] != null ? words[0] : "0");
            for (i = 1; i < words.length; ++i) {
                sb.append(this._delimString);
                sb.append(words[i] != null ? words[i] : "0");
            }
        }
        return sb.toString();
    }

    private SequenceFile.Reader initOffsetsReader(JobConf job) throws IOException {
        Path path = new Path(job.get("rowid.file.name"));
        FileSystem fs = FileSystem.get((Configuration)job);
        Path[] files = MatrixReader.getSequenceFilePaths(fs, path);
        if (files.length != 1) {
            throw new IOException("Expecting a single file under counters file: " + path.toString());
        }
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, files[0], (Configuration)job);
        return reader;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public String getPartFileID(JobConf job, long offset) throws IOException {
        SequenceFile.Reader reader = null;
        int id = 0;
        try {
            reader = this.initOffsetsReader(job);
            ByteWritable key = new ByteWritable();
            CSVReblockMR.OffsetCount value = new CSVReblockMR.OffsetCount();
            String thisFile = TfUtils.getPartFileName(job);
            while (reader.next((Writable)key, (Writable)value)) {
                if (thisFile.equals(value.filename) && value.fileOffset == offset) {
                    break;
                }
                ++id;
            }
        }
        finally {
            IOUtilFunctions.closeSilently((Closeable)reader);
        }
        String sid = Integer.toString(id);
        char[] carr = new char[5 - sid.length()];
        Arrays.fill(carr, '0');
        String ret = new String(carr).concat(sid);
        return ret;
    }

    protected static enum ColumnTypes {
        SCALE,
        NOMINAL,
        ORDINAL,
        DUMMYCODED;


        protected byte toID() {
            switch (this) {
                case SCALE: {
                    return 1;
                }
                case NOMINAL: {
                    return 2;
                }
                case ORDINAL: {
                    return 3;
                }
                case DUMMYCODED: {
                    return 1;
                }
            }
            throw new RuntimeException("Invalid Column Type: " + (Object)((Object)this));
        }
    }
}

