/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.nutch.crawl.Inlink;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LinkDbFilter
extends Mapper<Text, Inlinks, Text, Inlinks> {
    public static final String URL_FILTERING = "linkdb.url.filters";
    public static final String URL_NORMALIZING = "linkdb.url.normalizer";
    public static final String URL_NORMALIZING_SCOPE = "linkdb.url.normalizer.scope";
    private boolean filter;
    private boolean normalize;
    private URLFilters filters;
    private URLNormalizers normalizers;
    private String scope;
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private Text newKey = new Text();

    public void setup(Mapper.Context context) {
        Configuration conf = context.getConfiguration();
        this.filter = conf.getBoolean(URL_FILTERING, false);
        this.normalize = conf.getBoolean(URL_NORMALIZING, false);
        if (this.filter) {
            this.filters = new URLFilters(conf);
        }
        if (this.normalize) {
            this.scope = conf.get(URL_NORMALIZING_SCOPE, "linkdb");
            this.normalizers = new URLNormalizers(conf, this.scope);
        }
    }

    public void map(Text key, Inlinks value, Mapper.Context context) throws IOException, InterruptedException {
        String url = key.toString();
        Inlinks result = new Inlinks();
        if (this.normalize) {
            try {
                url = this.normalizers.normalize(url, this.scope);
            }
            catch (Exception e) {
                LOG.warn("Skipping " + url + ":" + e);
                url = null;
            }
        }
        if (url != null && this.filter) {
            try {
                url = this.filters.filter(url);
            }
            catch (Exception e) {
                LOG.warn("Skipping " + url + ":" + e);
                url = null;
            }
        }
        if (url == null) {
            return;
        }
        Iterator<Inlink> it = value.iterator();
        String fromUrl = null;
        while (it.hasNext()) {
            Inlink inlink = it.next();
            fromUrl = inlink.getFromUrl();
            if (this.normalize) {
                try {
                    fromUrl = this.normalizers.normalize(fromUrl, this.scope);
                }
                catch (Exception e) {
                    LOG.warn("Skipping " + fromUrl + ":" + e);
                    fromUrl = null;
                }
            }
            if (fromUrl != null && this.filter) {
                try {
                    fromUrl = this.filters.filter(fromUrl);
                }
                catch (Exception e) {
                    LOG.warn("Skipping " + fromUrl + ":" + e);
                    fromUrl = null;
                }
            }
            if (fromUrl == null) continue;
            result.add(new Inlink(fromUrl, inlink.getAnchor()));
        }
        if (result.size() > 0) {
            this.newKey.set(url);
            context.write((Object)this.newKey, (Object)result);
        }
    }
}

