/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.pruner;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.Pair;
import org.apache.mahout.common.iterator.sequencefile.SequenceFileIterable;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.map.OpenIntLongHashMap;

public class WordsPrunerReducer
extends Reducer<WritableComparable<?>, VectorWritable, WritableComparable<?>, VectorWritable> {
    private final OpenIntLongHashMap dictionary = new OpenIntLongHashMap();
    private long maxDf = Long.MAX_VALUE;
    private long minDf = -1L;

    protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Reducer.Context context) throws IOException, InterruptedException {
        Iterator<VectorWritable> it = values.iterator();
        if (!it.hasNext()) {
            return;
        }
        Vector value = it.next().get();
        Vector vector = value.clone();
        if (this.maxDf != Long.MAX_VALUE || this.minDf > -1L) {
            for (Vector.Element e : value.nonZeroes()) {
                if (!this.dictionary.containsKey(e.index())) {
                    vector.setQuick(e.index(), 0.0);
                    continue;
                }
                long df = this.dictionary.get(e.index());
                if (df <= this.maxDf && df >= this.minDf) continue;
                vector.setQuick(e.index(), 0.0);
            }
        }
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, (Object)vectorWritable);
    }

    protected void setup(Reducer.Context context) throws IOException, InterruptedException {
        super.setup(context);
        Configuration conf = context.getConfiguration();
        this.maxDf = conf.getLong("max.df", Long.MAX_VALUE);
        this.minDf = conf.getLong("min.df", -1L);
        Path dictionaryFile = HadoopUtil.getSingleCachedFile(conf);
        for (Pair record : new SequenceFileIterable(dictionaryFile, true, conf)) {
            this.dictionary.put(((IntWritable)record.getFirst()).get(), ((LongWritable)record.getSecond()).get());
        }
    }
}

