/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.net.urlnormalizer.protocol;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.nutch.net.URLNormalizer;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.util.SuffixStringMatcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ProtocolURLNormalizer
implements URLNormalizer {
    private Configuration conf;
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private String attributeFile = null;
    private final Map<String, String> protocolsMap = new HashMap<String, String>();
    private final Map<String, String> protocols = new TreeMap<String, String>();
    private final Map<String, String> domainProtocolsMap = new HashMap<String, String>();
    private SuffixStringMatcher domainMatcher = null;
    private static final Pattern PROTOCOL_VALIDATOR = Pattern.compile("^[a-z](?:[a-z0-9$\\-_@.&!*\"'(),]|%[0-9a-f]{2})*$", 2);

    private synchronized void readConfiguration(Reader configReader) throws IOException {
        String line;
        if (this.protocolsMap.size() > 0) {
            return;
        }
        BufferedReader reader = new BufferedReader(configReader);
        int lineNumber = 0;
        while ((line = reader.readLine()) != null) {
            ++lineNumber;
            if (!StringUtils.isNotBlank((String)(line = line.trim())) || line.startsWith("#")) continue;
            int delimiterIndex = line.indexOf(" ");
            if (delimiterIndex == -1) {
                delimiterIndex = line.indexOf("\t");
            }
            if (delimiterIndex == -1) {
                LOG.warn("Invalid line {}, no delimiter between <host/domain> and <protocol> found: {}", (Object)lineNumber, (Object)line);
                continue;
            }
            String host = line.substring(0, delimiterIndex);
            String protocol = line.substring(delimiterIndex + 1).trim();
            if (!PROTOCOL_VALIDATOR.matcher(protocol).matches()) {
                LOG.warn("Skipping rule with protocol not following RFC 1630 in line {}: {}", (Object)lineNumber, (Object)line);
                continue;
            }
            this.protocols.putIfAbsent(protocol, protocol);
            protocol = this.protocols.get(protocol);
            if (host.startsWith("*.")) {
                this.domainProtocolsMap.put(host.substring(1), protocol);
                this.protocolsMap.put(host.substring(2), protocol);
                continue;
            }
            this.protocolsMap.put(host, protocol);
        }
        if (this.domainProtocolsMap.size() > 0) {
            this.domainMatcher = new SuffixStringMatcher(this.domainProtocolsMap.keySet());
        }
        LOG.info("Configuration file read: rules for {} hosts and {} domains", (Object)this.protocolsMap.size(), (Object)this.domainProtocolsMap.size());
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        String pluginName = "urlnormalizer-protocol";
        Extension[] extensions = PluginRepository.get((Configuration)conf).getExtensionPoint(URLNormalizer.class.getName()).getExtensions();
        for (int i = 0; i < extensions.length; ++i) {
            Extension extension = extensions[i];
            if (!extension.getDescriptor().getPluginId().equals(pluginName)) continue;
            this.attributeFile = extension.getAttribute("file");
            break;
        }
        if (this.attributeFile != null && this.attributeFile.trim().isEmpty()) {
            this.attributeFile = null;
        }
        if (this.attributeFile != null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Attribute \"file\" is defined for plugin " + pluginName + " as " + this.attributeFile);
            }
        } else if (LOG.isWarnEnabled()) {
            LOG.warn("Attribute \"file\" is not defined in plugin.xml for plugin " + pluginName);
        }
        String file = conf.get("urlnormalizer.protocols.file", this.attributeFile);
        String stringRules = conf.get("urlnormalizer.protocols.rules");
        Reader reader = null;
        if (stringRules != null && !stringRules.isEmpty()) {
            reader = new StringReader(stringRules);
        } else {
            LOG.info("Reading {} rules file {} from Java class path", (Object)pluginName, (Object)file);
            reader = conf.getConfResourceAsReader(file);
        }
        try {
            if (reader == null) {
                Path path = new Path(file);
                FileSystem fs = path.getFileSystem(conf);
                LOG.info("Reading {} rules file {}", (Object)pluginName, (Object)path.toUri());
                reader = new InputStreamReader((InputStream)fs.open(path));
            }
            this.readConfiguration(reader);
        }
        catch (IOException | IllegalArgumentException e) {
            LOG.error("Error reading " + pluginName + " rule file " + file, (Throwable)e);
        }
    }

    public String normalize(String url, String scope) throws MalformedURLException {
        String domainMatch;
        URL u = new URL(url);
        String host = u.getHost();
        if (u.getPort() != -1) {
            return url;
        }
        String requiredProtocol = null;
        if (this.protocolsMap.containsKey(host)) {
            requiredProtocol = this.protocolsMap.get(host);
        } else if (this.domainMatcher != null && (domainMatch = this.domainMatcher.longestMatch(host)) != null) {
            requiredProtocol = this.domainProtocolsMap.get(domainMatch);
        }
        if (requiredProtocol != null && !u.getProtocol().equals(requiredProtocol)) {
            url = new URL(requiredProtocol, host, u.getPort(), u.getFile()).toString();
        }
        return url;
    }
}

