/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.nutch.crawl.AdaptiveFetchSchedule;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.HttpHeaders;
import org.apache.nutch.util.MimeUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MimeAdaptiveFetchSchedule
extends AdaptiveFetchSchedule {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    public static final String SCHEDULE_INC_RATE = "db.fetch.schedule.adaptive.inc_rate";
    public static final String SCHEDULE_DEC_RATE = "db.fetch.schedule.adaptive.dec_rate";
    public static final String SCHEDULE_MIME_FILE = "db.fetch.schedule.mime.file";
    private float defaultIncRate;
    private float defaultDecRate;
    private HashMap<String, AdaptiveRate> mimeMap;

    @Override
    public void setConf(Configuration conf) {
        super.setConf(conf);
        if (conf == null) {
            return;
        }
        this.defaultIncRate = conf.getFloat(SCHEDULE_INC_RATE, 0.2f);
        this.defaultDecRate = conf.getFloat(SCHEDULE_DEC_RATE, 0.2f);
        Reader mimeFile = conf.getConfResourceAsReader(conf.get(SCHEDULE_MIME_FILE, "adaptive-mimetypes.txt"));
        try {
            this.readMimeFile(mimeFile);
        }
        catch (IOException e) {
            LOG.error(org.apache.hadoop.util.StringUtils.stringifyException((Throwable)e));
        }
    }

    @Override
    public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
        String currentMime;
        this.INC_RATE = this.defaultIncRate;
        this.DEC_RATE = this.defaultDecRate;
        if (datum.getMetaData().containsKey((Object)HttpHeaders.WRITABLE_CONTENT_TYPE) && this.mimeMap.containsKey(currentMime = MimeUtil.cleanMimeType(datum.getMetaData().get((Object)HttpHeaders.WRITABLE_CONTENT_TYPE).toString()))) {
            this.INC_RATE = this.mimeMap.get((Object)currentMime).inc;
            this.DEC_RATE = this.mimeMap.get((Object)currentMime).dec;
        }
        return super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime, fetchTime, modifiedTime, state);
    }

    private void readMimeFile(Reader mimeFile) throws IOException {
        this.mimeMap = new HashMap();
        BufferedReader reader = new BufferedReader(mimeFile);
        String line = null;
        String[] splits = null;
        while ((line = reader.readLine()) != null) {
            if (!StringUtils.isNotBlank((String)line) || line.startsWith("#")) continue;
            splits = line.split("\t");
            if (splits.length == 3) {
                this.mimeMap.put(StringUtils.lowerCase((String)splits[0]), new AdaptiveRate(Float.valueOf(splits[1]), Float.valueOf(splits[2])));
                continue;
            }
            LOG.warn("Invalid configuration line in: " + line);
        }
    }

    public static void main(String[] args) throws Exception {
        MimeAdaptiveFetchSchedule fs = new MimeAdaptiveFetchSchedule();
        fs.setConf(NutchConfiguration.create());
        long curTime = 0L;
        long delta = 86400000L;
        long update = 2592000000L;
        boolean changed = true;
        long lastModified = 0L;
        int miss = 0;
        int totalMiss = 0;
        int maxMiss = 0;
        int fetchCnt = 0;
        int changeCnt = 0;
        CrawlDatum p = new CrawlDatum(1, 2592000, 1.0f);
        MapWritable x = new MapWritable();
        x.put((Writable)HttpHeaders.WRITABLE_CONTENT_TYPE, (Writable)new Text("text/html; charset=utf-8"));
        p.setMetaData(x);
        p.setFetchTime(0L);
        LOG.info(p.toString());
        for (int i = 0; i < 10000; ++i) {
            if (lastModified + update < curTime) {
                changed = true;
                ++changeCnt;
                lastModified = curTime;
            }
            LOG.info(i + ". " + changed + "\twill fetch at " + p.getFetchTime() / delta + "\tinterval " + p.getFetchInterval() / 86400 + " days\t missed " + miss);
            if (p.getFetchTime() <= curTime) {
                ++fetchCnt;
                fs.setFetchSchedule(new Text("http://www.example.com"), p, p.getFetchTime(), p.getModifiedTime(), curTime, lastModified, changed ? 1 : 2);
                LOG.info("\tfetched & adjusted: \twill fetch at " + p.getFetchTime() / delta + "\tinterval " + p.getFetchInterval() / 86400 + " days");
                if (!changed) {
                    ++miss;
                }
                if (miss > maxMiss) {
                    maxMiss = miss;
                }
                changed = false;
                totalMiss += miss;
                miss = 0;
            }
            if (changed) {
                ++miss;
            }
            curTime += delta;
        }
        LOG.info("Total missed: " + totalMiss + ", max miss: " + maxMiss);
        LOG.info("Page changed " + changeCnt + " times, fetched " + fetchCnt + " times.");
    }

    private class AdaptiveRate {
        public float inc;
        public float dec;

        public AdaptiveRate(Float inc, Float dec) {
            this.inc = inc.floatValue();
            this.dec = dec.floatValue();
        }
    }
}

