/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysml.runtime.instructions.spark.utils;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.ml.feature.LabeledPoint;
import org.apache.spark.ml.linalg.DenseVector;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.linalg.Vectors;
import org.apache.spark.mllib.linalg.SparseVector;
import org.apache.spark.mllib.util.MLUtils;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.util.LongAccumulator;
import org.apache.sysml.conf.ConfigurationManager;
import org.apache.sysml.hops.OptimizerUtils;
import org.apache.sysml.parser.Expression;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.spark.data.SerLongWritable;
import org.apache.sysml.runtime.instructions.spark.data.SerText;
import org.apache.sysml.runtime.instructions.spark.functions.ConvertMatrixBlockToIJVLines;
import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils;
import org.apache.sysml.runtime.instructions.spark.utils.SparkUtils;
import org.apache.sysml.runtime.io.IOUtilFunctions;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties;
import org.apache.sysml.runtime.matrix.data.MatrixBlock;
import org.apache.sysml.runtime.matrix.data.MatrixCell;
import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
import org.apache.sysml.runtime.matrix.data.OutputInfo;
import org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue;
import org.apache.sysml.runtime.matrix.mapred.ReblockBuffer;
import org.apache.sysml.runtime.util.DataConverter;
import org.apache.sysml.runtime.util.FastStringTokenizer;
import org.apache.sysml.runtime.util.MapReduceTool;
import org.apache.sysml.runtime.util.UtilFunctions;
import scala.Tuple2;

public class RDDConverterUtils {
    public static final String DF_ID_COLUMN = "__INDEX";

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> textCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, MatrixCharacteristics mcOut, boolean outputEmptyBlocks) throws DMLRuntimeException {
        JavaPairRDD out = input.values().mapPartitionsToPair((PairFlatMapFunction)new TextToBinaryBlockFunction(mcOut));
        if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
            out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
        }
        out = RDDAggregateUtils.mergeByKey((JavaPairRDD<MatrixIndexes, MatrixBlock>)out, false);
        return out;
    }

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> binaryCellToBinaryBlock(JavaSparkContext sc, JavaPairRDD<MatrixIndexes, MatrixCell> input, MatrixCharacteristics mcOut, boolean outputEmptyBlocks) throws DMLRuntimeException {
        JavaPairRDD out = input.mapPartitionsToPair((PairFlatMapFunction)new BinaryCellToBinaryBlockFunction(mcOut));
        if (outputEmptyBlocks && mcOut.mightHaveEmptyBlocks()) {
            out = out.union(SparkUtils.getEmptyBlockRDD(sc, mcOut));
        }
        out = RDDAggregateUtils.mergeByKey((JavaPairRDD<MatrixIndexes, MatrixBlock>)out, false);
        return out;
    }

    public static JavaRDD<LabeledPoint> binaryBlockToLabeledPoints(JavaPairRDD<MatrixIndexes, MatrixBlock> in) {
        JavaRDD pointrdd = in.values().flatMap((FlatMapFunction)new PrepareBinaryBlockFunction());
        return pointrdd;
    }

    public static JavaRDD<String> binaryBlockToTextCell(JavaPairRDD<MatrixIndexes, MatrixBlock> in, MatrixCharacteristics mc) {
        return in.flatMap((FlatMapFunction)new ConvertMatrixBlockToIJVLines(mc.getRowsPerBlock(), mc.getColsPerBlock()));
    }

    public static JavaRDD<String> binaryBlockToCsv(JavaPairRDD<MatrixIndexes, MatrixBlock> in, MatrixCharacteristics mcIn, CSVFileFormatProperties props, boolean strict) {
        JavaPairRDD input = in;
        if (mcIn.getCols() > (long)mcIn.getColsPerBlock()) {
            input = input.flatMapToPair((PairFlatMapFunction)new SliceBinaryBlockToRowsFunction(mcIn.getRowsPerBlock())).groupByKey().mapToPair((PairFunction)new ConcatenateBlocksFunction(mcIn.getCols(), mcIn.getColsPerBlock()));
        }
        if (strict) {
            input = input.sortByKey(true);
        }
        JavaRDD out = input.flatMap((FlatMapFunction)new BinaryBlockToCSVFunction(props));
        return out;
    }

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaPairRDD<LongWritable, Text> input, MatrixCharacteristics mc, boolean hasHeader, String delim, boolean fill, double fillValue) throws DMLRuntimeException {
        if (!mc.dimsKnown(true)) {
            LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
            JavaRDD tmp = input.values().map((Function)new CSVAnalysisFunction(aNnz, delim));
            long rlen = tmp.count() - (long)(hasHeader ? 1 : 0);
            long clen = ((String)tmp.first()).split(delim).length;
            long nnz = UtilFunctions.toLong(aNnz.value().longValue());
            mc.set(rlen, clen, mc.getRowsPerBlock(), mc.getColsPerBlock(), nnz);
        }
        JavaPairRDD prepinput = input.values().zipWithIndex();
        boolean sparse = RDDConverterUtils.requiresSparseAllocation(prepinput, mc);
        JavaPairRDD out = prepinput.mapPartitionsToPair((PairFlatMapFunction)new CSVToBinaryBlockFunction(mc, sparse, hasHeader, delim, fill, fillValue));
        int parts = SparkUtils.getNumPreferredPartitions(mc, out);
        return RDDAggregateUtils.mergeByKey((JavaPairRDD<MatrixIndexes, MatrixBlock>)out, parts, false);
    }

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> csvToBinaryBlock(JavaSparkContext sc, JavaRDD<String> input, MatrixCharacteristics mcOut, boolean hasHeader, String delim, boolean fill, double fillValue) throws DMLRuntimeException {
        JavaPairRDD prepinput = input.mapToPair((PairFunction)new StringToSerTextFunction());
        return RDDConverterUtils.csvToBinaryBlock(sc, (JavaPairRDD<LongWritable, Text>)prepinput, mcOut, hasHeader, delim, fill, fillValue);
    }

    public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlock(JavaSparkContext sc, Dataset<Row> df, MatrixCharacteristics mc, boolean containsID, boolean isVector) {
        if (!mc.dimsKnown(true)) {
            LongAccumulator aNnz = sc.sc().longAccumulator("nnz");
            JavaRDD tmp = df.javaRDD().map((Function)new DataFrameAnalysisFunction(aNnz, containsID, isVector));
            long rlen = tmp.count();
            long clen = !isVector ? (long)(df.columns().length - (containsID ? 1 : 0)) : (long)((Vector)((Row)tmp.first()).get(containsID ? 1 : 0)).size();
            long nnz = UtilFunctions.toLong(aNnz.value().longValue());
            mc.set(rlen, clen, mc.getRowsPerBlock(), mc.getColsPerBlock(), nnz);
        }
        if (mc.getRowsPerBlock() <= 1 || mc.getColsPerBlock() <= 1) {
            mc.setBlockSize(ConfigurationManager.getBlocksize());
        }
        JavaPairRDD prepinput = containsID ? df.javaRDD().mapToPair((PairFunction)new DataFrameExtractIDFunction(df.schema().fieldIndex(DF_ID_COLUMN))) : df.javaRDD().zipWithIndex();
        boolean sparse = RDDConverterUtils.requiresSparseAllocation(prepinput, mc);
        JavaPairRDD out = prepinput.mapPartitionsToPair((PairFlatMapFunction)new DataFrameToBinaryBlockFunction(mc, sparse, containsID, isVector));
        int parts = SparkUtils.getNumPreferredPartitions(mc, out);
        return RDDAggregateUtils.mergeByKey((JavaPairRDD<MatrixIndexes, MatrixBlock>)out, parts, false);
    }

    public static Dataset<Row> binaryBlockToDataFrame(SparkSession sparkSession, JavaPairRDD<MatrixIndexes, MatrixBlock> in, MatrixCharacteristics mc, boolean toVector) {
        if (!mc.colsKnown()) {
            throw new RuntimeException("Number of columns needed to convert binary block to data frame.");
        }
        JavaRDD rowsRDD = in.flatMapToPair((PairFlatMapFunction)new SliceBinaryBlockToRowsFunction(mc.getRowsPerBlock())).groupByKey().map((Function)new ConvertRowBlocksToRows((int)mc.getCols(), mc.getColsPerBlock(), toVector));
        ArrayList<StructField> fields = new ArrayList<StructField>();
        fields.add(DataTypes.createStructField((String)DF_ID_COLUMN, (DataType)DataTypes.DoubleType, (boolean)false));
        if (toVector) {
            fields.add(DataTypes.createStructField((String)"C1", (DataType)new VectorUDT(), (boolean)false));
        } else {
            int i = 1;
            while ((long)i <= mc.getCols()) {
                fields.add(DataTypes.createStructField((String)("C" + i), (DataType)DataTypes.DoubleType, (boolean)false));
                ++i;
            }
        }
        return sparkSession.createDataFrame(rowsRDD.rdd(), DataTypes.createStructType(fields));
    }

    @Deprecated
    public static Dataset<Row> binaryBlockToDataFrame(SQLContext sqlContext, JavaPairRDD<MatrixIndexes, MatrixBlock> in, MatrixCharacteristics mc, boolean toVector) {
        SparkSession sparkSession = sqlContext.sparkSession();
        return RDDConverterUtils.binaryBlockToDataFrame(sparkSession, in, mc, toVector);
    }

    public static void libsvmToBinaryBlock(JavaSparkContext sc, String pathIn, String pathX, String pathY, MatrixCharacteristics mcOutX) throws DMLRuntimeException {
        if (!mcOutX.dimsKnown()) {
            throw new DMLRuntimeException("Matrix characteristics required to convert sparse input representation.");
        }
        try {
            MapReduceTool.deleteFileIfExistOnHDFS(pathX);
            MapReduceTool.deleteFileIfExistOnHDFS(pathY);
            int numFeatures = (int)mcOutX.getCols();
            int numPartitions = SparkUtils.getNumPreferredPartitions(mcOutX, null);
            JavaRDD lpoints = MLUtils.loadLibSVMFile((SparkContext)sc.sc(), (String)pathIn, (int)numFeatures, (int)numPartitions).toJavaRDD();
            JavaPairRDD ilpoints = lpoints.zipWithIndex().persist(StorageLevel.MEMORY_AND_DISK());
            MatrixCharacteristics mc1 = new MatrixCharacteristics(mcOutX.getRows(), 1L, mcOutX.getRowsPerBlock(), mcOutX.getColsPerBlock(), -1L);
            LongAccumulator aNnz1 = sc.sc().longAccumulator("nnz");
            JavaPairRDD<MatrixIndexes, MatrixBlock> out1 = ilpoints.mapPartitionsToPair((PairFlatMapFunction)new LabeledPointToBinaryBlockFunction(mc1, true, aNnz1));
            int numPartitions2 = SparkUtils.getNumPreferredPartitions(mc1, null);
            out1 = RDDAggregateUtils.mergeByKey(out1, numPartitions2, false);
            out1.saveAsHadoopFile(pathY, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
            mc1.setNonZeros(aNnz1.value());
            MapReduceTool.writeMetaDataFile(pathY + ".mtd", Expression.ValueType.DOUBLE, mc1, OutputInfo.BinaryBlockOutputInfo);
            MatrixCharacteristics mc2 = new MatrixCharacteristics(mcOutX.getRows(), mcOutX.getCols(), mcOutX.getRowsPerBlock(), mcOutX.getColsPerBlock(), -1L);
            LongAccumulator aNnz2 = sc.sc().longAccumulator("nnz");
            JavaPairRDD<MatrixIndexes, MatrixBlock> out2 = ilpoints.mapPartitionsToPair((PairFlatMapFunction)new LabeledPointToBinaryBlockFunction(mc2, false, aNnz2));
            out2 = RDDAggregateUtils.mergeByKey(out2, numPartitions, false);
            out2.saveAsHadoopFile(pathX, MatrixIndexes.class, MatrixBlock.class, SequenceFileOutputFormat.class);
            mc2.setNonZeros(aNnz2.value());
            MapReduceTool.writeMetaDataFile(pathX + ".mtd", Expression.ValueType.DOUBLE, mc2, OutputInfo.BinaryBlockOutputInfo);
            ilpoints.unpersist(false);
        }
        catch (IOException ex) {
            throw new DMLRuntimeException(ex);
        }
    }

    public static JavaPairRDD<LongWritable, Text> stringToSerializableText(JavaPairRDD<Long, String> in) {
        return in.mapToPair((PairFunction)new TextToSerTextFunction());
    }

    private static boolean requiresSparseAllocation(JavaPairRDD<?, ?> in, MatrixCharacteristics mc) {
        double blksz;
        if (!mc.nnzKnown() || mc.nnzKnown() && MatrixBlock.evalSparseFormatInMemory(mc.getRows(), mc.getCols(), mc.getNonZeros())) {
            return true;
        }
        double datasize = OptimizerUtils.estimatePartitionedSizeExactSparsity(mc);
        double rowsize = OptimizerUtils.estimatePartitionedSizeExactSparsity(1L, mc.getCols(), mc.getNumRowBlocks(), (long)mc.getColsPerBlock(), Math.ceil((double)mc.getNonZeros() / (double)mc.getRows()));
        double partsize = Math.ceil(datasize / (double)in.getNumPartitions());
        return partsize / rowsize / (blksz = (double)Math.min(mc.getRows(), (long)mc.getRowsPerBlock())) < 0.4;
    }

    private static int countNnz(Object vect, boolean isVector, int off) {
        if (isVector) {
            return ((Vector)vect).numNonzeros();
        }
        return RDDConverterUtils.countNnz(vect, isVector, off, ((Row)vect).length());
    }

    private static int countNnz(Object vect, boolean isVector, int pos, int cu) {
        int lnnz;
        block5: {
            block3: {
                block4: {
                    lnnz = 0;
                    if (!isVector) break block3;
                    if (!(vect instanceof DenseVector)) break block4;
                    DenseVector vec = (DenseVector)vect;
                    for (int i = pos; i < cu; ++i) {
                        lnnz += vec.apply(i) != 0.0 ? 1 : 0;
                    }
                    break block5;
                }
                if (!(vect instanceof org.apache.spark.ml.linalg.SparseVector)) break block5;
                org.apache.spark.ml.linalg.SparseVector vec = (org.apache.spark.ml.linalg.SparseVector)vect;
                int alen = vec.numActives();
                int[] aix = vec.indices();
                double[] avals = vec.values();
                for (int i = pos; i < alen && aix[i] < cu; ++i) {
                    lnnz += avals[i] != 0.0 ? 1 : 0;
                }
                break block5;
            }
            Row row = (Row)vect;
            for (int i = pos; i < cu; ++i) {
                lnnz += UtilFunctions.isNonZero(row.get(i)) ? 1 : 0;
            }
        }
        return lnnz;
    }

    private static Vector createVector(MatrixBlock row) {
        if (row.isEmptyBlock(false)) {
            return Vectors.sparse((int)row.getNumColumns(), (int[])new int[0], (double[])new double[0]);
        }
        if (row.isInSparseFormat()) {
            return Vectors.sparse((int)row.getNumColumns(), (int[])row.getSparseBlock().indexes(0), (double[])row.getSparseBlock().values(0));
        }
        return Vectors.dense((double[])row.getDenseBlockValues());
    }

    private static class ConvertRowBlocksToRows
    implements Function<Tuple2<Long, Iterable<Tuple2<Long, MatrixBlock>>>, Row> {
        private static final long serialVersionUID = 4441184411670316972L;
        private int _clen;
        private int _bclen;
        private boolean _toVector;

        public ConvertRowBlocksToRows(int clen, int bclen, boolean toVector) {
            this._clen = clen;
            this._bclen = bclen;
            this._toVector = toVector;
        }

        public Row call(Tuple2<Long, Iterable<Tuple2<Long, MatrixBlock>>> arg0) throws Exception {
            Object[] row = new Object[this._toVector ? 2 : this._clen + 1];
            row[0] = (double)((Long)arg0._1()).longValue();
            if (this._toVector) {
                if (this._clen <= this._bclen) {
                    row[1] = RDDConverterUtils.createVector((MatrixBlock)((Tuple2)((Iterable)arg0._2()).iterator().next())._2());
                } else {
                    double[] tmp = new double[this._clen];
                    for (Tuple2 kv : (Iterable)arg0._2()) {
                        int cl = (((Long)kv._1()).intValue() - 1) * this._bclen;
                        MatrixBlock mb = (MatrixBlock)kv._2();
                        DataConverter.copyToDoubleVector(mb, tmp, cl);
                    }
                    row[1] = Vectors.dense((double[])tmp);
                }
            } else {
                for (Tuple2 kv : (Iterable)arg0._2()) {
                    int cl = (((Long)kv._1()).intValue() - 1) * this._bclen;
                    MatrixBlock mb = (MatrixBlock)kv._2();
                    for (int j = 0; j < mb.getNumColumns(); ++j) {
                        row[cl + j + 1] = mb.quickGetValue(0, j);
                    }
                }
            }
            return RowFactory.create((Object[])row);
        }
    }

    public static class DataFrameExtractIDFunction
    implements PairFunction<Row, Row, Long> {
        private static final long serialVersionUID = 7438855241666363433L;
        private int _index = -1;

        public DataFrameExtractIDFunction(int index) {
            this._index = index;
        }

        public Tuple2<Row, Long> call(Row arg0) throws Exception {
            long id = UtilFunctions.toLong(UtilFunctions.getDouble(arg0.get(this._index)));
            if (id <= 0L) {
                throw new DMLRuntimeException("ID Column '__INDEX' expected to be 1-based, but found value: " + id);
            }
            return new Tuple2((Object)arg0, (Object)(id - 1L));
        }
    }

    private static class DataFrameAnalysisFunction
    implements Function<Row, Row> {
        private static final long serialVersionUID = 5705371332119770215L;
        private LongAccumulator _aNnz = null;
        private boolean _containsID;
        private boolean _isVector;

        public DataFrameAnalysisFunction(LongAccumulator aNnz, boolean containsID, boolean isVector) {
            this._aNnz = aNnz;
            this._containsID = containsID;
            this._isVector = isVector;
        }

        public Row call(Row arg0) throws Exception {
            int off = this._containsID ? 1 : 0;
            Row vect = this._isVector ? arg0.get(off) : arg0;
            int lnnz = RDDConverterUtils.countNnz(vect, this._isVector, off);
            this._aNnz.add((long)lnnz);
            return arg0;
        }
    }

    private static class DataFrameToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<Row, Long>>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = 653447740362447236L;
        private long _rlen = -1L;
        private long _clen = -1L;
        private int _brlen = -1;
        private int _bclen = -1;
        private double _sparsity = 1.0;
        private boolean _sparse = false;
        private boolean _containsID;
        private boolean _isVector;

        public DataFrameToBinaryBlockFunction(MatrixCharacteristics mc, boolean sparse, boolean containsID, boolean isVector) {
            this._rlen = mc.getRows();
            this._clen = mc.getCols();
            this._brlen = mc.getRowsPerBlock();
            this._bclen = mc.getColsPerBlock();
            this._sparsity = OptimizerUtils.getSparsity(mc);
            this._sparse = sparse;
            this._containsID = containsID;
            this._isVector = isVector;
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Row, Long>> arg0) throws Exception {
            ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            MatrixIndexes[] ix = new MatrixIndexes[ncblks];
            MatrixBlock[] mb = new MatrixBlock[ncblks];
            while (arg0.hasNext()) {
                int pix;
                Tuple2<Row, Long> tmp = arg0.next();
                long rowix = (Long)tmp._2() + 1L;
                long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
                int pos = UtilFunctions.computeCellInBlock(rowix, this._brlen);
                if (ix[0] == null || ix[0].getRowIndex() != rix) {
                    if (ix[0] != null) {
                        DataFrameToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
                    }
                    long len = UtilFunctions.computeBlockSize(this._rlen, rix, this._brlen);
                    this.createBlocks(rowix, (int)len, ix, mb);
                }
                int off = this._containsID ? 1 : 0;
                Object obj = this._isVector ? ((Row)tmp._1()).get(off) : tmp._1();
                int n = pix = this._isVector ? 0 : off;
                for (int cix = 1; cix <= ncblks; ++cix) {
                    int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                    int cu = (int)Math.min(this._clen, (long)(cix * this._bclen)) + (this._isVector ? 0 : off);
                    if (mb[cix - 1].isInSparseFormat()) {
                        int lnnz = RDDConverterUtils.countNnz(obj, this._isVector, pix, cu);
                        mb[cix - 1].getSparseBlock().allocate(pos, lnnz);
                    }
                    if (this._isVector) {
                        Vector vect = (Vector)obj;
                        if (vect instanceof org.apache.spark.ml.linalg.SparseVector) {
                            org.apache.spark.ml.linalg.SparseVector svect = (org.apache.spark.ml.linalg.SparseVector)vect;
                            int[] svectIx = svect.indices();
                            while (pix < svectIx.length && svectIx[pix] < cu) {
                                int j = UtilFunctions.computeCellInBlock(svectIx[pix] + 1, this._bclen);
                                mb[cix - 1].appendValue(pos, j, svect.values()[pix++]);
                            }
                            continue;
                        }
                        for (int j = 0; j < lclen; ++j) {
                            mb[cix - 1].appendValue(pos, j, vect.apply(pix++));
                        }
                        continue;
                    }
                    Row row = (Row)obj;
                    for (int j = 0; j < lclen; ++j) {
                        mb[cix - 1].appendValue(pos, j, UtilFunctions.getDouble(row.get(pix++)));
                    }
                }
            }
            DataFrameToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
            return ret.iterator();
        }

        private void createBlocks(long rowix, int lrlen, MatrixIndexes[] ix, MatrixBlock[] mb) {
            long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            for (int cix = 1; cix <= ncblks; ++cix) {
                int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                ix[cix - 1] = new MatrixIndexes(rix, cix);
                mb[cix - 1] = new MatrixBlock(lrlen, lclen, this._sparse, (int)((double)(lrlen * lclen) * this._sparsity));
                mb[cix - 1].allocateBlock();
            }
        }

        private static void flushBlocksToList(MatrixIndexes[] ix, MatrixBlock[] mb, ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret) throws DMLRuntimeException {
            int len = ix.length;
            for (int i = 0; i < len; ++i) {
                if (mb[i] == null) continue;
                ret.add((Tuple2<MatrixIndexes, MatrixBlock>)new Tuple2((Object)ix[i], (Object)mb[i]));
                mb[i].examSparsity();
            }
        }
    }

    private static class ConcatenateBlocksFunction
    implements PairFunction<Tuple2<Long, Iterable<Tuple2<Long, MatrixBlock>>>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = -7879603125149650097L;
        private long _clen = -1L;
        private int _bclen = -1;
        private int _ncblks = -1;

        public ConcatenateBlocksFunction(long clen, int bclen) {
            this._clen = clen;
            this._bclen = bclen;
            this._ncblks = (int)Math.ceil((double)clen / (double)bclen);
        }

        public Tuple2<MatrixIndexes, MatrixBlock> call(Tuple2<Long, Iterable<Tuple2<Long, MatrixBlock>>> arg0) throws Exception {
            long rowIndex = (Long)arg0._1();
            MatrixBlock[] tmpBlks = new MatrixBlock[this._ncblks];
            for (Tuple2 entry : (Iterable)arg0._2()) {
                tmpBlks[((Long)entry._1()).intValue() - 1] = (MatrixBlock)entry._2();
            }
            MatrixBlock out = new MatrixBlock(1, (int)this._clen, tmpBlks[0].isInSparseFormat());
            for (int i = 0; i < this._ncblks; ++i) {
                out.copy(0, 0, i * this._bclen, (int)Math.min((long)((i + 1) * this._bclen), this._clen) - 1, tmpBlks[i], false);
            }
            out.recomputeNonZeros();
            return new Tuple2((Object)new MatrixIndexes(rowIndex, 1L), (Object)out);
        }
    }

    private static class SliceBinaryBlockToRowsFunction
    implements PairFlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, Long, Tuple2<Long, MatrixBlock>> {
        private static final long serialVersionUID = 7192024840710093114L;
        private int _brlen = -1;

        public SliceBinaryBlockToRowsFunction(int brlen) {
            this._brlen = brlen;
        }

        public Iterator<Tuple2<Long, Tuple2<Long, MatrixBlock>>> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
            ArrayList<Tuple2> ret = new ArrayList<Tuple2>();
            MatrixIndexes ix = (MatrixIndexes)arg0._1();
            MatrixBlock blk = (MatrixBlock)arg0._2();
            for (int i = 0; i < blk.getNumRows(); ++i) {
                MatrixBlock tmpBlk = blk.slice(i, i, 0, blk.getNumColumns() - 1, new MatrixBlock());
                long rix = UtilFunctions.computeCellIndex(ix.getRowIndex(), this._brlen, i);
                ret.add(new Tuple2((Object)rix, (Object)new Tuple2((Object)ix.getColumnIndex(), (Object)tmpBlk)));
            }
            return ret.iterator();
        }
    }

    private static class BinaryBlockToCSVFunction
    implements FlatMapFunction<Tuple2<MatrixIndexes, MatrixBlock>, String> {
        private static final long serialVersionUID = 1891768410987528573L;
        private CSVFileFormatProperties _props = null;

        public BinaryBlockToCSVFunction(CSVFileFormatProperties props) {
            this._props = props;
        }

        public Iterator<String> call(Tuple2<MatrixIndexes, MatrixBlock> arg0) throws Exception {
            StringBuilder sb;
            MatrixIndexes ix = (MatrixIndexes)arg0._1();
            MatrixBlock blk = (MatrixBlock)arg0._2();
            ArrayList<String> ret = new ArrayList<String>();
            if (this._props.hasHeader() && ix.getRowIndex() == 1L) {
                sb = new StringBuilder();
                for (int j = 1; j < blk.getNumColumns(); ++j) {
                    if (j != 1) {
                        sb.append(this._props.getDelim());
                    }
                    sb.append("C" + j);
                }
                ret.add(sb.toString());
            }
            sb = new StringBuilder();
            for (int i = 0; i < blk.getNumRows(); ++i) {
                for (int j = 0; j < blk.getNumColumns(); ++j) {
                    if (j != 0) {
                        sb.append(this._props.getDelim());
                    }
                    double val = blk.quickGetValue(i, j);
                    if (this._props.isSparse() && val == 0.0) continue;
                    sb.append(val);
                }
                ret.add(sb.toString());
                sb.setLength(0);
            }
            return ret.iterator();
        }
    }

    private static class LabeledPointToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint, Long>>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = 2290124693964816276L;
        private final long _rlen;
        private final long _clen;
        private final int _brlen;
        private final int _bclen;
        private final boolean _sparseX;
        private final boolean _labels;
        private final LongAccumulator _aNnz;

        public LabeledPointToBinaryBlockFunction(MatrixCharacteristics mc, boolean labels, LongAccumulator aNnz) {
            this._rlen = mc.getRows();
            this._clen = mc.getCols();
            this._brlen = mc.getRowsPerBlock();
            this._bclen = mc.getColsPerBlock();
            this._sparseX = MatrixBlock.evalSparseFormatInMemory(mc.getRows(), mc.getCols(), mc.getNonZeros());
            this._labels = labels;
            this._aNnz = aNnz;
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<org.apache.spark.mllib.regression.LabeledPoint, Long>> arg0) throws Exception {
            ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            MatrixIndexes[] ix = new MatrixIndexes[ncblks];
            MatrixBlock[] mb = new MatrixBlock[ncblks];
            while (arg0.hasNext()) {
                Tuple2<org.apache.spark.mllib.regression.LabeledPoint, Long> tmp = arg0.next();
                org.apache.spark.mllib.regression.LabeledPoint row = (org.apache.spark.mllib.regression.LabeledPoint)tmp._1();
                boolean lsparse = this._sparseX || !this._labels && row.features() instanceof SparseVector;
                long rowix = (Long)tmp._2() + 1L;
                long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
                int pos = UtilFunctions.computeCellInBlock(rowix, this._brlen);
                if (ix[0] == null || ix[0].getRowIndex() != rix) {
                    if (ix[0] != null) {
                        LabeledPointToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
                    }
                    long len = UtilFunctions.computeBlockSize(this._rlen, rix, this._brlen);
                    this.createBlocks(rowix, (int)len, ix, mb, lsparse);
                }
                if (this._labels) {
                    double val = row.label();
                    mb[0].appendValue(pos, 0, val);
                    this._aNnz.add(val != 0.0 ? 1L : 0L);
                    continue;
                }
                int lnnz = row.features().numNonzeros();
                if (row.features() instanceof SparseVector) {
                    SparseVector srow = (SparseVector)row.features();
                    for (int k = 0; k < lnnz; ++k) {
                        int gix = srow.indices()[k] + 1;
                        int cix = (int)UtilFunctions.computeBlockIndex(gix, this._bclen);
                        int j = UtilFunctions.computeCellInBlock(gix, this._bclen);
                        mb[cix - 1].appendValue(pos, j, srow.values()[k]);
                    }
                } else {
                    int pix = 0;
                    for (int cix = 1; cix <= ncblks; ++cix) {
                        int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                        for (int j = 0; j < lclen; ++j) {
                            mb[cix - 1].appendValue(pos, j, row.features().apply(pix++));
                        }
                    }
                }
                this._aNnz.add((long)lnnz);
            }
            LabeledPointToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
            return ret.iterator();
        }

        private void createBlocks(long rowix, int lrlen, MatrixIndexes[] ix, MatrixBlock[] mb, boolean lsparse) {
            long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            for (int cix = 1; cix <= ncblks; ++cix) {
                int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                ix[cix - 1] = new MatrixIndexes(rix, cix);
                mb[cix - 1] = new MatrixBlock(lrlen, lclen, lsparse);
                mb[cix - 1].allocateBlock();
            }
        }

        private static void flushBlocksToList(MatrixIndexes[] ix, MatrixBlock[] mb, ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret) throws DMLRuntimeException {
            int len = ix.length;
            for (int i = 0; i < len; ++i) {
                if (mb[i] == null) continue;
                ret.add((Tuple2<MatrixIndexes, MatrixBlock>)new Tuple2((Object)ix[i], (Object)mb[i]));
                mb[i].examSparsity();
            }
        }
    }

    private static class CSVToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<Text, Long>>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = -4948430402942717043L;
        private long _rlen = -1L;
        private long _clen = -1L;
        private int _brlen = -1;
        private int _bclen = -1;
        private double _sparsity = 1.0;
        private boolean _sparse = false;
        private boolean _header = false;
        private String _delim = null;
        private boolean _fill = false;
        private double _fillValue = 0.0;

        public CSVToBinaryBlockFunction(MatrixCharacteristics mc, boolean sparse, boolean hasHeader, String delim, boolean fill, double fillValue) {
            this._rlen = mc.getRows();
            this._clen = mc.getCols();
            this._brlen = mc.getRowsPerBlock();
            this._bclen = mc.getColsPerBlock();
            this._sparsity = OptimizerUtils.getSparsity(mc);
            this._sparse = sparse && (!fill || fillValue == 0.0);
            this._header = hasHeader;
            this._delim = delim;
            this._fill = fill;
            this._fillValue = fillValue;
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<Text, Long>> arg0) throws Exception {
            ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            MatrixIndexes[] ix = new MatrixIndexes[ncblks];
            MatrixBlock[] mb = new MatrixBlock[ncblks];
            while (arg0.hasNext()) {
                Tuple2<Text, Long> tmp = arg0.next();
                String row = ((Text)tmp._1()).toString();
                long rowix = (Long)tmp._2() + (long)(this._header ? 0 : 1);
                if (this._header && rowix == 0L) continue;
                long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
                int pos = UtilFunctions.computeCellInBlock(rowix, this._brlen);
                if (ix[0] == null || ix[0].getRowIndex() != rix) {
                    if (ix[0] != null) {
                        CSVToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
                    }
                    long len = UtilFunctions.computeBlockSize(this._rlen, rix, this._brlen);
                    this.createBlocks(rowix, (int)len, ix, mb);
                }
                String[] parts = IOUtilFunctions.split(row, this._delim);
                boolean emptyFound = false;
                int pix = 0;
                for (int cix = 1; cix <= ncblks; ++cix) {
                    int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                    if (mb[cix - 1].isInSparseFormat()) {
                        int lnnz = IOUtilFunctions.countNnz(parts, pix, lclen);
                        mb[cix - 1].getSparseBlock().allocate(pos, lnnz);
                    }
                    for (int j = 0; j < lclen; ++j) {
                        String part;
                        emptyFound |= (part = parts[pix++]).isEmpty() && !this._fill;
                        double val = part.isEmpty() && this._fill ? this._fillValue : Double.parseDouble(part);
                        mb[cix - 1].appendValue(pos, j, val);
                    }
                }
                IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(row, this._fill, emptyFound);
            }
            CSVToBinaryBlockFunction.flushBlocksToList(ix, mb, ret);
            return ret.iterator();
        }

        private void createBlocks(long rowix, int lrlen, MatrixIndexes[] ix, MatrixBlock[] mb) {
            long rix = UtilFunctions.computeBlockIndex(rowix, this._brlen);
            int ncblks = (int)Math.ceil((double)this._clen / (double)this._bclen);
            for (int cix = 1; cix <= ncblks; ++cix) {
                int lclen = UtilFunctions.computeBlockSize(this._clen, cix, this._bclen);
                ix[cix - 1] = new MatrixIndexes(rix, cix);
                mb[cix - 1] = new MatrixBlock(lrlen, lclen, this._sparse, (int)((double)(lrlen * lclen) * this._sparsity));
                mb[cix - 1].allocateBlock();
            }
        }

        private static void flushBlocksToList(MatrixIndexes[] ix, MatrixBlock[] mb, ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret) throws DMLRuntimeException {
            int len = ix.length;
            for (int i = 0; i < len; ++i) {
                if (mb[i] == null) continue;
                ret.add((Tuple2<MatrixIndexes, MatrixBlock>)new Tuple2((Object)ix[i], (Object)mb[i]));
                mb[i].examSparsity();
            }
        }
    }

    private static class CSVAnalysisFunction
    implements Function<Text, String> {
        private static final long serialVersionUID = 2310303223289674477L;
        private LongAccumulator _aNnz = null;
        private String _delim = null;

        public CSVAnalysisFunction(LongAccumulator aNnz, String delim) {
            this._aNnz = aNnz;
            this._delim = delim;
        }

        public String call(Text v1) throws Exception {
            String line = v1.toString();
            String[] cols = IOUtilFunctions.split(line, this._delim);
            int lnnz = IOUtilFunctions.countNnz(cols);
            this._aNnz.add((long)lnnz);
            return line;
        }
    }

    private static class BinaryCellToBinaryBlockFunction
    extends CellToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Tuple2<MatrixIndexes, MatrixCell>>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = 3928810989462198243L;

        protected BinaryCellToBinaryBlockFunction(MatrixCharacteristics mc) {
            super(mc);
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Tuple2<MatrixIndexes, MatrixCell>> arg0) throws Exception {
            ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
            ReblockBuffer rbuff = new ReblockBuffer(this._bufflen, this._rlen, this._clen, this._brlen, this._bclen);
            while (arg0.hasNext()) {
                Tuple2<MatrixIndexes, MatrixCell> tmp = arg0.next();
                long row = ((MatrixIndexes)tmp._1()).getRowIndex();
                long col = ((MatrixIndexes)tmp._1()).getColumnIndex();
                double val = ((MatrixCell)tmp._2()).getValue();
                if (rbuff.getSize() >= rbuff.getCapacity()) {
                    this.flushBufferToList(rbuff, ret);
                }
                rbuff.appendCell(row, col, val);
            }
            this.flushBufferToList(rbuff, ret);
            return ret.iterator();
        }
    }

    private static class StringToSerTextFunction
    implements PairFunction<String, LongWritable, Text> {
        private static final long serialVersionUID = 2286037080400222528L;

        private StringToSerTextFunction() {
        }

        public Tuple2<LongWritable, Text> call(String arg0) throws Exception {
            SerLongWritable slarg = new SerLongWritable(1L);
            SerText starg = new SerText(arg0);
            return new Tuple2((Object)slarg, (Object)starg);
        }
    }

    private static class TextToSerTextFunction
    implements PairFunction<Tuple2<Long, String>, LongWritable, Text> {
        private static final long serialVersionUID = 2286037080400222528L;

        private TextToSerTextFunction() {
        }

        public Tuple2<LongWritable, Text> call(Tuple2<Long, String> arg0) throws Exception {
            SerLongWritable slarg = new SerLongWritable((Long)arg0._1());
            SerText starg = new SerText((String)arg0._2());
            return new Tuple2((Object)slarg, (Object)starg);
        }
    }

    private static class TextToBinaryBlockFunction
    extends CellToBinaryBlockFunction
    implements PairFlatMapFunction<Iterator<Text>, MatrixIndexes, MatrixBlock> {
        private static final long serialVersionUID = 4907483236186747224L;

        protected TextToBinaryBlockFunction(MatrixCharacteristics mc) {
            super(mc);
        }

        public Iterator<Tuple2<MatrixIndexes, MatrixBlock>> call(Iterator<Text> arg0) throws Exception {
            ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret = new ArrayList<Tuple2<MatrixIndexes, MatrixBlock>>();
            ReblockBuffer rbuff = new ReblockBuffer(this._bufflen, this._rlen, this._clen, this._brlen, this._bclen);
            FastStringTokenizer st = new FastStringTokenizer(' ');
            while (arg0.hasNext()) {
                String strVal = arg0.next().toString();
                if (strVal.startsWith("%")) continue;
                st.reset(strVal);
                long row = st.nextLong();
                long col = st.nextLong();
                if (row == 0L || col == 0L) continue;
                double val = st.nextDouble();
                if (rbuff.getSize() >= rbuff.getCapacity()) {
                    this.flushBufferToList(rbuff, ret);
                }
                rbuff.appendCell(row, col, val);
            }
            this.flushBufferToList(rbuff, ret);
            return ret.iterator();
        }
    }

    private static abstract class CellToBinaryBlockFunction
    implements Serializable {
        private static final long serialVersionUID = 4205331295408335933L;
        protected static final int BUFFER_SIZE = 4000000;
        protected int _bufflen = -1;
        protected long _rlen = -1L;
        protected long _clen = -1L;
        protected int _brlen = -1;
        protected int _bclen = -1;

        protected CellToBinaryBlockFunction(MatrixCharacteristics mc) {
            this._rlen = mc.getRows();
            this._clen = mc.getCols();
            this._brlen = mc.getRowsPerBlock();
            this._bclen = mc.getColsPerBlock();
            this._bufflen = (int)Math.min(this._rlen * this._clen, 4000000L);
        }

        protected void flushBufferToList(ReblockBuffer rbuff, ArrayList<Tuple2<MatrixIndexes, MatrixBlock>> ret) throws IOException, DMLRuntimeException {
            ArrayList<IndexedMatrixValue> rettmp = new ArrayList<IndexedMatrixValue>();
            rbuff.flushBufferToBinaryBlocks(rettmp);
            ret.addAll(SparkUtils.fromIndexedMatrixBlock(rettmp));
        }
    }

    private static class PrepareBinaryBlockFunction
    implements FlatMapFunction<MatrixBlock, LabeledPoint> {
        private static final long serialVersionUID = -6590259914203201585L;

        private PrepareBinaryBlockFunction() {
        }

        public Iterator<LabeledPoint> call(MatrixBlock arg0) throws Exception {
            ArrayList<LabeledPoint> ret = new ArrayList<LabeledPoint>();
            for (int i = 0; i < arg0.getNumRows(); ++i) {
                MatrixBlock tmp = arg0.slice(i, i, 0, arg0.getNumColumns() - 2, new MatrixBlock());
                ret.add(new LabeledPoint(arg0.getValue(i, arg0.getNumColumns() - 1), RDDConverterUtils.createVector(tmp)));
            }
            return ret.iterator();
        }
    }
}

