/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.FetchSchedule;
import org.apache.nutch.crawl.FetchScheduleFactory;
import org.apache.nutch.crawl.InlinkPriorityQueue;
import org.apache.nutch.crawl.SignatureComparator;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CrawlDbReducer
extends Reducer<Text, CrawlDatum, Text, CrawlDatum> {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private int retryMax;
    private CrawlDatum result = new CrawlDatum();
    private InlinkPriorityQueue linked = null;
    private ScoringFilters scfilters = null;
    private boolean additionsAllowed;
    private int maxInterval;
    private FetchSchedule schedule;

    public void setup(Reducer.Context context) {
        Configuration conf = context.getConfiguration();
        this.retryMax = conf.getInt("db.fetch.retry.max", 3);
        this.scfilters = new ScoringFilters(conf);
        this.additionsAllowed = conf.getBoolean("db.update.additions.allowed", true);
        this.maxInterval = conf.getInt("db.fetch.interval.max", 0);
        this.schedule = FetchScheduleFactory.getFetchSchedule(conf);
        int maxLinks = conf.getInt("db.update.max.inlinks", 10000);
        this.linked = new InlinkPriorityQueue(maxLinks);
    }

    public void reduce(Text key, Iterable<CrawlDatum> values, Reducer.Context context) throws IOException, InterruptedException {
        CrawlDatum fetch = new CrawlDatum();
        CrawlDatum old = new CrawlDatum();
        boolean fetchSet = false;
        boolean oldSet = false;
        byte[] signature = null;
        boolean multiple = false;
        this.linked.clear();
        MapWritable metaFromParse = null;
        block23: for (CrawlDatum datum : values) {
            if (!multiple) {
                multiple = true;
            }
            if (CrawlDatum.hasDbStatus(datum)) {
                if (!oldSet) {
                    if (multiple) {
                        old.set(datum);
                    } else {
                        old = datum;
                    }
                    oldSet = true;
                    continue;
                }
                if (old.getFetchTime() >= datum.getFetchTime()) continue;
                old.set(datum);
                continue;
            }
            if (CrawlDatum.hasFetchStatus(datum)) {
                if (!fetchSet) {
                    if (multiple) {
                        fetch.set(datum);
                    } else {
                        fetch = datum;
                    }
                    fetchSet = true;
                    continue;
                }
                if (fetch.getFetchTime() >= datum.getFetchTime()) continue;
                fetch.set(datum);
                continue;
            }
            switch (datum.getStatus()) {
                case 67: {
                    CrawlDatum link;
                    if (multiple) {
                        link = new CrawlDatum();
                        link.set(datum);
                    } else {
                        link = datum;
                    }
                    this.linked.insert(link);
                    continue block23;
                }
                case 65: {
                    signature = datum.getSignature();
                    continue block23;
                }
                case 68: {
                    metaFromParse = datum.getMetaData();
                    continue block23;
                }
            }
            LOG.warn("Unknown status, key: " + key + ", datum: " + datum);
        }
        int numLinks = this.linked.size();
        ArrayList<CrawlDatum> linkList = new ArrayList<CrawlDatum>(numLinks);
        for (int i = numLinks - 1; i >= 0; --i) {
            linkList.add((CrawlDatum)this.linked.pop());
        }
        if (!oldSet && !this.additionsAllowed) {
            return;
        }
        if (!fetchSet && linkList.size() > 0) {
            fetch = (CrawlDatum)linkList.get(0);
            fetchSet = true;
        }
        if (!fetchSet) {
            if (oldSet) {
                try {
                    this.scfilters.orphanedScore(key, old);
                }
                catch (ScoringFilterException e) {
                    LOG.warn("Couldn't update orphaned score, key={}: {}", (Object)key, (Object)e);
                }
                context.write((Object)key, (Object)old);
                context.getCounter("CrawlDB status", CrawlDatum.getStatusName(old.getStatus())).increment(1L);
            } else {
                LOG.warn("Missing fetch and old value, signature={}", (Object)StringUtil.toHexString(signature));
            }
            return;
        }
        if (signature == null) {
            signature = fetch.getSignature();
        }
        long prevModifiedTime = oldSet ? old.getModifiedTime() : 0L;
        long prevFetchTime = oldSet ? old.getFetchTime() : 0L;
        this.result.set(fetch);
        if (oldSet) {
            if (old.getMetaData().size() > 0) {
                this.result.putAllMetaData(old);
                if (fetch.getMetaData().size() > 0) {
                    this.result.putAllMetaData(fetch);
                }
            }
            if (old.getModifiedTime() > 0L && fetch.getModifiedTime() == 0L) {
                this.result.setModifiedTime(old.getModifiedTime());
            }
        }
        switch (fetch.getStatus()) {
            case 67: {
                if (oldSet) {
                    this.result.set(old);
                    break;
                }
                this.result = this.schedule.initializeSchedule(key, this.result);
                this.result.setStatus(1);
                try {
                    this.scfilters.initialScore(key, this.result);
                }
                catch (ScoringFilterException e) {
                    LOG.warn("Cannot filter init score for url {}, using default: {}", (Object)key, (Object)e.getMessage());
                    this.result.setScore(0.0f);
                }
                break;
            }
            case 33: 
            case 35: 
            case 36: 
            case 38: {
                if (metaFromParse != null) {
                    for (Map.Entry e : metaFromParse.entrySet()) {
                        this.result.getMetaData().put((Writable)e.getKey(), (Writable)e.getValue());
                    }
                }
                int modified = 0;
                if (fetch.getStatus() == 38) {
                    modified = 2;
                } else if (fetch.getStatus() == 33 && oldSet && old.getSignature() != null && signature != null) {
                    modified = SignatureComparator._compare(old.getSignature(), signature) != 0 ? 1 : 2;
                }
                this.result = this.schedule.setFetchSchedule(key, this.result, prevFetchTime, prevModifiedTime, fetch.getFetchTime(), fetch.getModifiedTime(), modified);
                if (modified == 2) {
                    this.result.setStatus(6);
                    this.result.setModifiedTime(prevModifiedTime);
                    if (oldSet) {
                        this.result.setSignature(old.getSignature());
                    }
                } else {
                    switch (fetch.getStatus()) {
                        case 33: {
                            this.result.setStatus(2);
                            break;
                        }
                        case 36: {
                            this.result.setStatus(5);
                            break;
                        }
                        case 35: {
                            this.result.setStatus(4);
                            break;
                        }
                        default: {
                            LOG.warn("Unexpected status: " + fetch.getStatus() + " resetting to old status.");
                            if (oldSet) {
                                this.result.setStatus(old.getStatus());
                                break;
                            }
                            this.result.setStatus(1);
                        }
                    }
                    this.result.setSignature(signature);
                }
                if (this.maxInterval >= this.result.getFetchInterval()) break;
                this.result = this.schedule.forceRefetch(key, this.result, false);
                break;
            }
            case 65: {
                LOG.warn("Lone CrawlDatum.STATUS_SIGNATURE: {}", (Object)key);
                return;
            }
            case 34: {
                if (oldSet) {
                    this.result.setSignature(old.getSignature());
                }
                this.result = this.schedule.setPageRetrySchedule(key, this.result, prevFetchTime, prevModifiedTime, fetch.getFetchTime());
                if (this.result.getRetriesSinceFetch() < this.retryMax) {
                    this.result.setStatus(1);
                    break;
                }
                this.result.setStatus(3);
                this.result = this.schedule.setPageGoneSchedule(key, this.result, prevFetchTime, prevModifiedTime, fetch.getFetchTime());
                break;
            }
            case 37: {
                if (oldSet) {
                    this.result.setSignature(old.getSignature());
                }
                this.result.setStatus(3);
                this.result = this.schedule.setPageGoneSchedule(key, this.result, prevFetchTime, prevModifiedTime, fetch.getFetchTime());
                break;
            }
            default: {
                throw new RuntimeException("Unknown status: " + fetch.getStatus() + " " + key);
            }
        }
        try {
            this.scfilters.updateDbScore(key, oldSet ? old : null, this.result, linkList);
        }
        catch (Exception e) {
            LOG.warn("Couldn't update score, key={}: {}", (Object)key, (Object)e);
        }
        this.result.getMetaData().remove((Object)Nutch.WRITABLE_GENERATE_TIME_KEY);
        context.write((Object)key, (Object)this.result);
        context.getCounter("CrawlDB status", CrawlDatum.getStatusName(this.result.getStatus())).increment(1L);
    }
}

