/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.AbstractFetchSchedule;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AdaptiveFetchSchedule
extends AbstractFetchSchedule {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    protected float INC_RATE;
    protected float DEC_RATE;
    private float MAX_INTERVAL;
    private float MIN_INTERVAL;
    private boolean SYNC_DELTA;
    private double SYNC_DELTA_RATE;
    private Configuration conf;
    private Map<String, Float> hostSpecificMaxInterval = new HashMap<String, Float>();
    private Map<String, Float> hostSpecificMinInterval = new HashMap<String, Float>();

    @Override
    public void setConf(Configuration conf) {
        super.setConf(conf);
        this.conf = conf;
        if (conf == null) {
            return;
        }
        this.INC_RATE = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f);
        this.DEC_RATE = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f);
        this.MIN_INTERVAL = conf.getFloat("db.fetch.schedule.adaptive.min_interval", 60.0f);
        this.MAX_INTERVAL = conf.getFloat("db.fetch.schedule.adaptive.max_interval", 3.1536E7f);
        this.SYNC_DELTA = conf.getBoolean("db.fetch.schedule.adaptive.sync_delta", true);
        this.SYNC_DELTA_RATE = conf.getFloat("db.fetch.schedule.adaptive.sync_delta_rate", 0.2f);
        try {
            this.setHostSpecificIntervals("adaptive-host-specific-intervals.txt", this.MIN_INTERVAL, this.MAX_INTERVAL);
        }
        catch (IOException e) {
            LOG.error("Failed reading the configuration file. ", (Throwable)e);
        }
    }

    private void setHostSpecificIntervals(String fileName, float defaultMin, float defaultMax) throws IOException {
        String line;
        Reader configReader = null;
        configReader = this.conf.getConfResourceAsReader(fileName);
        if (configReader == null) {
            configReader = new FileReader(fileName);
        }
        BufferedReader reader = new BufferedReader(configReader);
        int lineNo = 0;
        while ((line = reader.readLine()) != null) {
            ++lineNo;
            if (!StringUtils.isNotBlank((String)line) || line.startsWith("#")) continue;
            String[] parts = (line = line.trim()).split("\\s+");
            if (parts.length == 3) {
                String host = parts[0].trim().toLowerCase();
                String minInt = parts[1].trim();
                String maxInt = parts[2].trim();
                if (minInt.equalsIgnoreCase("default")) {
                    minInt = "0";
                }
                if (maxInt.equalsIgnoreCase("default")) {
                    maxInt = "0";
                }
                try {
                    float m = Float.parseFloat(minInt);
                    float M = Float.parseFloat(maxInt);
                    if (m < 0.0f || M < 0.0f || m > M) {
                        LOG.error("Improper fetch intervals given on line " + String.valueOf(lineNo) + " in the config. file: " + line);
                        continue;
                    }
                    if (m > 0.0f && m > defaultMin) {
                        this.hostSpecificMinInterval.put(host, Float.valueOf(m));
                        LOG.debug("Added custom min. interval " + m + " for host " + host + ".");
                    } else if (m > 0.0f) {
                        LOG.error("Min. interval out of bounds on line " + String.valueOf(lineNo) + " in the config. file: " + line);
                    }
                    if (M > 0.0f && M < defaultMax) {
                        this.hostSpecificMaxInterval.put(host, Float.valueOf(M));
                        LOG.debug("Added custom max. interval " + M + " for host " + host + ".");
                        continue;
                    }
                    if (!(M > 0.0f)) continue;
                    LOG.error("Max. interval out of bounds on line " + String.valueOf(lineNo) + " in the config. file: " + line);
                }
                catch (NumberFormatException e) {
                    LOG.error("No proper fetch intervals given on line " + String.valueOf(lineNo) + " in the config. file: " + line, (Throwable)e);
                }
                continue;
            }
            LOG.error("Malformed (domain, min_interval, max_interval) triplet on line " + String.valueOf(lineNo) + " of the config. file: " + line);
        }
    }

    public static String getHostName(String url) throws URISyntaxException {
        URI uri = new URI(url);
        String domain = uri.getHost();
        return domain;
    }

    public float getMaxInterval(Text url, float defaultMaxInterval) {
        String host;
        if (this.hostSpecificMaxInterval.isEmpty()) {
            return defaultMaxInterval;
        }
        try {
            host = AdaptiveFetchSchedule.getHostName(url.toString());
        }
        catch (URISyntaxException e) {
            return defaultMaxInterval;
        }
        if (this.hostSpecificMaxInterval.containsKey(host)) {
            return this.hostSpecificMaxInterval.get(host).floatValue();
        }
        return defaultMaxInterval;
    }

    public float getMinInterval(Text url, float defaultMinInterval) {
        String host;
        if (this.hostSpecificMinInterval.isEmpty()) {
            return defaultMinInterval;
        }
        try {
            host = AdaptiveFetchSchedule.getHostName(url.toString());
        }
        catch (URISyntaxException e) {
            return defaultMinInterval;
        }
        if (this.hostSpecificMinInterval.containsKey(host)) {
            return this.hostSpecificMinInterval.get(host).floatValue();
        }
        return defaultMinInterval;
    }

    @Override
    public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum, long prevFetchTime, long prevModifiedTime, long fetchTime, long modifiedTime, int state) {
        super.setFetchSchedule(url, datum, prevFetchTime, prevModifiedTime, fetchTime, modifiedTime, state);
        float interval = datum.getFetchInterval();
        long refTime = fetchTime;
        float f = interval = interval == 0.0f ? (float)this.defaultInterval : interval;
        if (datum.getMetaData().containsKey((Object)Nutch.WRITABLE_FIXED_INTERVAL_KEY)) {
            FloatWritable customIntervalWritable = (FloatWritable)datum.getMetaData().get((Object)Nutch.WRITABLE_FIXED_INTERVAL_KEY);
            interval = customIntervalWritable.get();
        } else {
            if (modifiedTime <= 0L) {
                modifiedTime = fetchTime;
            }
            switch (state) {
                case 1: {
                    interval *= 1.0f - this.DEC_RATE;
                    modifiedTime = fetchTime;
                    break;
                }
                case 2: {
                    interval *= 1.0f + this.INC_RATE;
                    break;
                }
            }
            if (this.SYNC_DELTA) {
                long delta = (fetchTime - modifiedTime) / 1000L;
                if ((float)delta > interval) {
                    interval = delta;
                }
                refTime = fetchTime - Math.round((double)delta * this.SYNC_DELTA_RATE * 1000.0);
            }
            float newMaxInterval = this.getMaxInterval(url, this.MAX_INTERVAL);
            float newMinInterval = this.getMinInterval(url, this.MIN_INTERVAL);
            if (interval < newMinInterval) {
                interval = newMinInterval;
            } else if (interval > newMaxInterval) {
                interval = newMaxInterval;
            }
        }
        datum.setFetchInterval(interval);
        datum.setFetchTime(refTime + Math.round((double)interval * 1000.0));
        datum.setModifiedTime(modifiedTime);
        return datum;
    }

    public static void main(String[] args) throws Exception {
        AdaptiveFetchSchedule fs = new AdaptiveFetchSchedule();
        fs.setConf(NutchConfiguration.create());
        long curTime = 0L;
        long delta = 86400000L;
        long update = 2592000000L;
        boolean changed = true;
        long lastModified = 0L;
        int miss = 0;
        int totalMiss = 0;
        int maxMiss = 0;
        int fetchCnt = 0;
        int changeCnt = 0;
        CrawlDatum p = new CrawlDatum(1, 2592000, 1.0f);
        p.setFetchTime(0L);
        LOG.info(p.toString());
        for (int i = 0; i < 10000; ++i) {
            if (lastModified + update < curTime) {
                changed = true;
                ++changeCnt;
                lastModified = curTime;
            }
            LOG.info(i + ". " + changed + "\twill fetch at " + p.getFetchTime() / delta + "\tinterval " + p.getFetchInterval() / 86400 + " days\t missed " + miss);
            if (p.getFetchTime() <= curTime) {
                ++fetchCnt;
                fs.setFetchSchedule(new Text("http://www.example.com"), p, p.getFetchTime(), p.getModifiedTime(), curTime, lastModified, changed ? 1 : 2);
                LOG.info("\tfetched & adjusted: \twill fetch at " + p.getFetchTime() / delta + "\tinterval " + p.getFetchInterval() / 86400 + " days");
                if (!changed) {
                    ++miss;
                }
                if (miss > maxMiss) {
                    maxMiss = miss;
                }
                changed = false;
                totalMiss += miss;
                miss = 0;
            }
            if (changed) {
                ++miss;
            }
            curTime += delta;
        }
        LOG.info("Total missed: " + totalMiss + ", max miss: " + maxMiss);
        LOG.info("Page changed " + changeCnt + " times, fetched " + fetchCnt + " times.");
    }
}

