/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ScoreUpdater
extends Configured
implements Tool {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    public void update(Path crawlDb, Path webGraphDb) throws IOException, ClassNotFoundException, InterruptedException {
        StopWatch stopWatch = new StopWatch();
        stopWatch.start();
        LOG.info("ScoreUpdater: starting");
        Configuration conf = this.getConf();
        LOG.info("Running crawldb update " + crawlDb);
        Path nodeDb = new Path(webGraphDb, "nodes");
        Path crawlDbCurrent = new Path(crawlDb, "current");
        Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        Job updater = Job.getInstance((Configuration)conf, (String)("Nutch ScoreUpdater: " + crawlDb));
        FileInputFormat.addInputPath((Job)updater, (Path)crawlDbCurrent);
        FileInputFormat.addInputPath((Job)updater, (Path)nodeDb);
        FileOutputFormat.setOutputPath((Job)updater, (Path)newCrawlDb);
        updater.setInputFormatClass(SequenceFileInputFormat.class);
        updater.setJarByClass(ScoreUpdater.class);
        updater.setMapperClass(ScoreUpdaterMapper.class);
        updater.setReducerClass(ScoreUpdaterReducer.class);
        updater.setMapOutputKeyClass(Text.class);
        updater.setMapOutputValueClass(ObjectWritable.class);
        updater.setOutputKeyClass(Text.class);
        updater.setOutputValueClass(CrawlDatum.class);
        updater.setOutputFormatClass(MapFileOutputFormat.class);
        try {
            boolean success = updater.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("Update CrawlDb from WebGraph", updater);
                LOG.error(message);
                FileSystem fs = newCrawlDb.getFileSystem(conf);
                if (fs.exists(newCrawlDb)) {
                    fs.delete(newCrawlDb, true);
                }
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error("Update CrawlDb from WebGraph:", (Throwable)e);
            FileSystem fs = newCrawlDb.getFileSystem(conf);
            if (fs.exists(newCrawlDb)) {
                fs.delete(newCrawlDb, true);
            }
            throw e;
        }
        LOG.info("ScoreUpdater: installing new crawldb " + crawlDb);
        CrawlDb.install(updater, crawlDb);
        stopWatch.stop();
        LOG.info("ScoreUpdater: finished, elapsed: {} ms ", (Object)stopWatch.getTime(TimeUnit.MILLISECONDS));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new ScoreUpdater(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"crawldb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the crawldb to use");
        Option crawlDbOpts = OptionBuilder.create((String)"crawldb");
        options.addOption(crawlDbOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the webgraphdb to use");
        Option webGraphOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webGraphOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb") || !line.hasOption("crawldb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("ScoreUpdater", options);
                return -1;
            }
            String crawlDb = line.getOptionValue("crawldb");
            String webGraphDb = line.getOptionValue("webgraphdb");
            this.update(new Path(crawlDb), new Path(webGraphDb));
            return 0;
        }
        catch (Exception e) {
            LOG.error("ScoreUpdater: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }

    public static class ScoreUpdaterReducer
    extends Reducer<Text, ObjectWritable, Text, CrawlDatum> {
        private float clearScore = 0.0f;

        public void setup(Reducer.Context context) {
            Configuration conf = context.getConfiguration();
            this.clearScore = conf.getFloat("link.score.updater.clear.score", 0.0f);
        }

        public void reduce(Text key, Iterable<ObjectWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            String url = key.toString();
            Node node = null;
            CrawlDatum datum = null;
            for (ObjectWritable next : values) {
                Object value = next.get();
                if (value instanceof Node) {
                    node = (Node)value;
                    continue;
                }
                if (!(value instanceof CrawlDatum)) continue;
                datum = (CrawlDatum)value;
            }
            if (datum != null) {
                if (node != null) {
                    float inlinkScore = node.getInlinkScore();
                    datum.setScore(inlinkScore);
                    LOG.debug(url + ": setting to score " + inlinkScore);
                } else {
                    datum.setScore(this.clearScore);
                    LOG.debug(url + ": setting to clear score of " + this.clearScore);
                }
                context.write((Object)key, (Object)datum);
            } else {
                LOG.debug(url + ": no datum");
            }
        }
    }

    public static class ScoreUpdaterMapper
    extends Mapper<Text, Writable, Text, ObjectWritable> {
        public void map(Text key, Writable value, Mapper.Context context) throws IOException, InterruptedException {
            ObjectWritable objWrite = new ObjectWritable();
            objWrite.set((Object)value);
            context.write((Object)key, (Object)objWrite);
        }
    }
}

