/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.cf.taste.example.email;

import com.google.common.io.Closeables;
import java.io.Closeable;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.example.email.FromEmailToDictionaryMapper;
import org.apache.mahout.cf.taste.example.email.MailToDictionaryReducer;
import org.apache.mahout.cf.taste.example.email.MailToRecMapper;
import org.apache.mahout.cf.taste.example.email.MailToRecReducer;
import org.apache.mahout.cf.taste.example.email.MsgIdToDictionaryMapper;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.iterator.sequencefile.PathFilters;
import org.apache.mahout.common.iterator.sequencefile.PathType;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
import org.apache.mahout.math.VarIntWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public final class MailToPrefsDriver
extends AbstractJob {
    private static final Logger log = LoggerFactory.getLogger(MailToPrefsDriver.class);
    private static final String OUTPUT_FILES_PATTERN = "part-*";
    private static final int DICTIONARY_BYTE_OVERHEAD = 4;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Configuration)new Configuration(), (Tool)new MailToPrefsDriver(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption((Option)DefaultOptionCreator.overwriteOption().create());
        this.addOption("chunkSize", "cs", "The size of chunks to write.  Default is 100 mb", "100");
        this.addOption("separator", "sep", "The separator used in the input file to separate to, from, subject.  Default is \\n", "\n");
        this.addOption("from", "f", "The position in the input text (value) where the from email is located, starting from zero (0).", "0");
        this.addOption("refs", "r", "The position in the input text (value) where the reference ids are located, starting from zero (0).", "1");
        this.addOption(MailToPrefsDriver.buildOption((String)"useCounts", (String)"u", (String)"If set, then use the number of times the user has interacted with a thread as an indication of their preference.  Otherwise, use boolean preferences.", (boolean)false, (boolean)false, (String)String.valueOf(true)));
        Map parsedArgs = this.parseArguments(args);
        Path input = this.getInputPath();
        Path output = this.getOutputPath();
        int chunkSize = Integer.parseInt(this.getOption("chunkSize"));
        String separator = this.getOption("separator");
        Configuration conf = this.getConf();
        boolean useCounts = this.hasOption("useCounts");
        AtomicInteger currentPhase = new AtomicInteger();
        int[] msgDim = new int[1];
        List<Path> msgIdChunks = null;
        boolean overwrite = this.hasOption("overwrite");
        if (MailToPrefsDriver.shouldRunNextPhase((Map)parsedArgs, (AtomicInteger)currentPhase)) {
            Path msgIdsPath = new Path(output, "msgIds");
            if (overwrite) {
                HadoopUtil.delete((Configuration)conf, (Path[])new Path[]{msgIdsPath});
            }
            log.info("Creating Msg Id Dictionary");
            Job createMsgIdDictionary = this.prepareJob(input, msgIdsPath, SequenceFileInputFormat.class, MsgIdToDictionaryMapper.class, Text.class, VarIntWritable.class, MailToDictionaryReducer.class, Text.class, VarIntWritable.class, SequenceFileOutputFormat.class);
            boolean succeeded = createMsgIdDictionary.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
            msgIdChunks = MailToPrefsDriver.createDictionaryChunks(msgIdsPath, output, "msgIds-dictionary-", createMsgIdDictionary.getConfiguration(), chunkSize, msgDim);
        }
        List<Path> fromChunks = null;
        if (MailToPrefsDriver.shouldRunNextPhase((Map)parsedArgs, (AtomicInteger)currentPhase)) {
            Path fromIdsPath = new Path(output, "fromIds");
            if (overwrite) {
                HadoopUtil.delete((Configuration)conf, (Path[])new Path[]{fromIdsPath});
            }
            log.info("Creating From Id Dictionary");
            Job createFromIdDictionary = this.prepareJob(input, fromIdsPath, SequenceFileInputFormat.class, FromEmailToDictionaryMapper.class, Text.class, VarIntWritable.class, MailToDictionaryReducer.class, Text.class, VarIntWritable.class, SequenceFileOutputFormat.class);
            createFromIdDictionary.getConfiguration().set("separator", separator);
            boolean succeeded = createFromIdDictionary.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
            int[] fromDim = new int[1];
            fromChunks = MailToPrefsDriver.createDictionaryChunks(fromIdsPath, output, "fromIds-dictionary-", createFromIdDictionary.getConfiguration(), chunkSize, fromDim);
        }
        if (MailToPrefsDriver.shouldRunNextPhase((Map)parsedArgs, (AtomicInteger)currentPhase) && fromChunks != null && msgIdChunks != null) {
            log.info("Creating recommendation matrix");
            Path vecPath = new Path(output, "recInput");
            if (overwrite) {
                HadoopUtil.delete((Configuration)conf, (Path[])new Path[]{vecPath});
            }
            conf.set("msgIdDim", String.valueOf(msgDim[0]));
            conf.set("fromPrefix", "fromIds-dictionary-");
            conf.set("msgIdsPrefix", "msgIds-dictionary-");
            conf.set("fromIdx", this.getOption("from"));
            conf.set("refsIdx", this.getOption("refs"));
            conf.set("separator", separator);
            conf.set("useBooleanPreferences", String.valueOf(useCounts));
            int j = 0;
            int i = 0;
            for (Path fromChunk : fromChunks) {
                for (Path idChunk : msgIdChunks) {
                    Path out = new Path(vecPath, "tmp-" + i + '-' + j);
                    DistributedCache.setCacheFiles((URI[])new URI[]{fromChunk.toUri(), idChunk.toUri()}, (Configuration)conf);
                    Job createRecMatrix = this.prepareJob(input, out, SequenceFileInputFormat.class, MailToRecMapper.class, Text.class, LongWritable.class, MailToRecReducer.class, Text.class, NullWritable.class, TextOutputFormat.class);
                    createRecMatrix.getConfiguration().set("mapred.output.compress", "false");
                    boolean succeeded = createRecMatrix.waitForCompletion(true);
                    if (!succeeded) {
                        return -1;
                    }
                    FileStatus[] fs = HadoopUtil.getFileStatus((Path)new Path(out, "*"), (PathType)PathType.GLOB, (PathFilter)PathFilters.partFilter(), null, (Configuration)conf);
                    for (int k = 0; k < fs.length; ++k) {
                        FileStatus f = fs[k];
                        Path outPath = new Path(vecPath, "chunk-" + i + '-' + j + '-' + k);
                        FileUtil.copy((FileSystem)f.getPath().getFileSystem(conf), (Path)f.getPath(), (FileSystem)outPath.getFileSystem(conf), (Path)outPath, (boolean)true, (boolean)overwrite, (Configuration)conf);
                    }
                    HadoopUtil.delete((Configuration)conf, (Path[])new Path[]{out});
                    ++j;
                }
                ++i;
            }
        }
        return 0;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static List<Path> createDictionaryChunks(Path inputPath, Path dictionaryPathBase, String name, Configuration baseConf, int chunkSizeInMegabytes, int[] maxTermDimension) throws IOException {
        ArrayList<Path> chunkPaths = new ArrayList<Path>();
        Configuration conf = new Configuration(baseConf);
        FileSystem fs = FileSystem.get((URI)inputPath.toUri(), (Configuration)conf);
        long chunkSizeLimit = (long)chunkSizeInMegabytes * 1024L * 1024L;
        int chunkIndex = 0;
        Path chunkPath = new Path(dictionaryPathBase, name + chunkIndex);
        chunkPaths.add(chunkPath);
        SequenceFile.Writer dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);
        try {
            long currentChunkSize = 0L;
            Path filesPattern = new Path(inputPath, OUTPUT_FILES_PATTERN);
            int i = 1;
            for (Pair record : new SequenceFileDirIterable(filesPattern, PathType.GLOB, null, null, true, conf)) {
                if (currentChunkSize > chunkSizeLimit) {
                    Closeables.close((Closeable)dictWriter, (boolean)false);
                    chunkPath = new Path(dictionaryPathBase, name + ++chunkIndex);
                    chunkPaths.add(chunkPath);
                    dictWriter = new SequenceFile.Writer(fs, conf, chunkPath, Text.class, IntWritable.class);
                    currentChunkSize = 0L;
                }
                Writable key = (Writable)record.getFirst();
                int fieldSize = 4 + key.toString().length() * 2 + 4;
                currentChunkSize += (long)fieldSize;
                dictWriter.append(key, (Writable)new IntWritable(i++));
            }
            maxTermDimension[0] = i;
        }
        finally {
            Closeables.close((Closeable)dictWriter, (boolean)false);
        }
        return chunkPaths;
    }
}

