/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexwriter.csv;

import java.io.IOException;
import java.nio.charset.Charset;
import java.util.AbstractMap;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.indexer.IndexWriter;
import org.apache.nutch.indexer.IndexWriterParams;
import org.apache.nutch.indexer.IndexingJob;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.NutchField;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CSVIndexWriter
implements IndexWriter {
    private static final Logger LOG = LoggerFactory.getLogger(CSVIndexWriter.class);
    private Configuration config;
    private String[] fields;
    protected Charset encoding = Charset.forName("UTF-8");
    private Separator recordSeparator = new Separator("\r\n");
    private Separator fieldSeparator = new Separator(",");
    private Separator valueSeparator = new Separator("|");
    private Separator quoteCharacter;
    private Separator escapeCharacter = this.quoteCharacter = new Separator("\"");
    private int maxFieldLength = 4096;
    private int maxFieldValues = 12;
    private boolean withHeader = true;
    private String outputPath = "csvindexwriter";
    private FileSystem fs;
    protected FSDataOutputStream csvout;
    private Path csvLocalOutFile;

    public void open(Configuration conf, String name) throws IOException {
    }

    public void open(IndexWriterParams parameters) throws IOException {
        this.outputPath = parameters.get("outpath", this.outputPath);
        String charset = (String)parameters.get((Object)"charset");
        if (charset != null) {
            this.encoding = Charset.forName(charset);
        }
        this.fieldSeparator.setFromConf(parameters, "separator");
        this.quoteCharacter.setFromConf(parameters, "quotechar", true);
        this.escapeCharacter.setFromConf(parameters, "escapechar", true);
        this.valueSeparator.setFromConf(parameters, "valuesep");
        this.withHeader = parameters.getBoolean("header", true);
        this.maxFieldLength = parameters.getInt("maxfieldlength", this.maxFieldLength);
        LOG.info("maxfieldlength = " + this.maxFieldLength);
        this.maxFieldValues = parameters.getInt("maxfieldvalues", this.maxFieldValues);
        LOG.info("maxfieldvalues = " + this.maxFieldValues);
        this.fields = parameters.getStrings("fields", new String[]{"id", "title", "content"});
        LOG.info("fields =");
        for (String f : this.fields) {
            LOG.info("\t" + f);
        }
        this.fs = FileSystem.get((Configuration)this.config);
        LOG.info("Writing output to {}", (Object)this.outputPath);
        Path outputDir = new Path(this.outputPath);
        this.fs = outputDir.getFileSystem(this.config);
        this.csvLocalOutFile = new Path(outputDir, "nutch.csv");
        if (!this.fs.exists(outputDir)) {
            this.fs.mkdirs(outputDir);
        }
        if (this.fs.exists(this.csvLocalOutFile)) {
            LOG.warn("Removing existing output path {}", (Object)this.csvLocalOutFile);
            this.fs.delete(this.csvLocalOutFile, true);
        }
        this.csvout = this.fs.create(this.csvLocalOutFile);
        if (this.withHeader) {
            for (int i = 0; i < this.fields.length; ++i) {
                if (i > 0) {
                    this.csvout.write(this.fieldSeparator.bytes);
                }
                this.csvout.write(this.fields[i].getBytes(this.encoding));
            }
        }
        this.csvout.write(this.recordSeparator.bytes);
    }

    public void write(NutchDocument doc) throws IOException {
        for (int i = 0; i < this.fields.length; ++i) {
            NutchField field;
            if (i > 0) {
                this.csvout.write(this.fieldSeparator.bytes);
            }
            if ((field = doc.getField(this.fields[i])) == null) continue;
            List values = field.getValues();
            int nValues = values.size();
            if (nValues > this.maxFieldValues) {
                nValues = this.maxFieldValues;
            }
            if (nValues > 1) {
                this.csvout.write(this.quoteCharacter.bytes);
            }
            ListIterator it = values.listIterator();
            int j = 0;
            while (it.hasNext() && j <= nValues) {
                Object objval = it.next();
                if (objval == null) continue;
                String value = objval instanceof Date ? objval.toString() : (String)objval;
                if (nValues > 1) {
                    this.writeEscaped(value);
                    if (!it.hasNext()) continue;
                    this.csvout.write(this.valueSeparator.bytes);
                    continue;
                }
                this.writeQuoted(value);
            }
            if (nValues <= 1) continue;
            this.csvout.write(this.quoteCharacter.bytes);
        }
        this.csvout.write(this.recordSeparator.bytes);
    }

    public void delete(String key) {
    }

    public void update(NutchDocument doc) throws IOException {
        this.write(doc);
    }

    public void close() throws IOException {
        this.csvout.close();
        LOG.info("Finished CSV index in {}", (Object)this.csvLocalOutFile);
    }

    public void commit() {
    }

    public Configuration getConf() {
        return this.config;
    }

    public Map<String, Map.Entry<String, Object>> describe() {
        LinkedHashMap<String, Map.Entry<String, Object>> properties = new LinkedHashMap<String, Map.Entry<String, Object>>();
        properties.put("fields", new AbstractMap.SimpleEntry<String, String>("Ordered list of fields (columns) in the CSV file", this.fields == null ? "" : String.join((CharSequence)",", this.fields)));
        properties.put("separator", new AbstractMap.SimpleEntry<String, Separator>("Separator between fields (columns), default: , (U+002C, comma)", this.fieldSeparator));
        properties.put("quotechar", new AbstractMap.SimpleEntry<String, Separator>("Quote character used to quote fields containing separators or quotes, default: \" (U+0022, quotation mark)", this.quoteCharacter));
        properties.put("escapechar", new AbstractMap.SimpleEntry<String, Separator>("Escape character used to escape a quote character, default: \" (U+0022, quotation mark)", this.escapeCharacter));
        properties.put("valuesep", new AbstractMap.SimpleEntry<String, Separator>("Separator between multiple values of one field, default: | (U+007C)", this.valueSeparator));
        properties.put("maxfieldvalues", new AbstractMap.SimpleEntry<String, Integer>("Max. number of values of one field, useful for, e.g., the anchor texts field, default: 12", this.maxFieldValues));
        properties.put("maxfieldlength", new AbstractMap.SimpleEntry<String, Integer>("Max. length of a single field value in characters, default: 4096", this.maxFieldLength));
        properties.put("charset", new AbstractMap.SimpleEntry<String, Charset>("Encoding of CSV file, default: UTF-8", this.encoding));
        properties.put("header", new AbstractMap.SimpleEntry<String, Boolean>("Write CSV column headers, default: true", this.withHeader));
        properties.put("outpath", new AbstractMap.SimpleEntry<String, String>("Output path / directory, default: csvindexwriter. ", this.outputPath));
        return properties;
    }

    public void setConf(Configuration conf) {
        this.config = conf;
    }

    private void writeQuoted(String value) throws IOException {
        int nextQuoteChar;
        if (this.quoteCharacter.chars.length > 0 && ((nextQuoteChar = this.quoteCharacter.find(value, 0)) >= 0 || this.fieldSeparator.find(value, 0) >= 0 || this.recordSeparator.find(value, 0) >= 0)) {
            this.csvout.write(this.quoteCharacter.bytes);
            this.writeEscaped(value, nextQuoteChar);
            this.csvout.write(this.quoteCharacter.bytes);
        } else if (value.length() > this.maxFieldLength) {
            this.csvout.write(value.substring(0, this.maxFieldLength).getBytes(this.encoding));
        } else {
            this.csvout.write(value.getBytes(this.encoding));
        }
    }

    private void writeEscaped(String value, int nextQuoteChar) throws IOException {
        int start = 0;
        int max = value.length();
        if (max > this.maxFieldLength) {
            max = this.maxFieldLength;
        }
        while (nextQuoteChar >= 0 && nextQuoteChar < max) {
            this.csvout.write(value.substring(start, nextQuoteChar).getBytes(this.encoding));
            this.csvout.write(this.escapeCharacter.bytes);
            this.csvout.write(this.quoteCharacter.bytes);
            start = nextQuoteChar + 1;
            nextQuoteChar = this.quoteCharacter.find(value, start);
        }
        this.csvout.write(value.substring(start, max).getBytes(this.encoding));
    }

    private void writeEscaped(String value) throws IOException {
        int nextQuoteChar = this.quoteCharacter.find(value, 0);
        this.writeEscaped(value, nextQuoteChar);
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new IndexingJob(), (String[])args);
        System.exit(res);
    }

    protected class Separator {
        protected String sepStr;
        protected char[] chars;
        protected byte[] bytes;

        protected Separator(String sep) {
            this.set(sep);
        }

        protected void set(String str) {
            if (str != null) {
                this.sepStr = str;
                this.chars = str.length() == 0 ? new char[0] : str.toCharArray();
            }
            this.bytes = this.sepStr.getBytes(CSVIndexWriter.this.encoding);
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            for (char c : this.chars) {
                if (c == '\n') {
                    sb.append("\\n");
                    continue;
                }
                if (c == '\r') {
                    sb.append("\\r");
                    continue;
                }
                if (c == '\t') {
                    sb.append("\\t");
                    continue;
                }
                if (c >= '\u007f' || c <= ' ') {
                    sb.append(String.format("\\u%04x", c));
                    continue;
                }
                sb.append(c);
            }
            return sb.toString();
        }

        protected void setFromConf(IndexWriterParams parameters, String property) {
            this.setFromConf(parameters, property, false);
        }

        protected void setFromConf(IndexWriterParams parameters, String property, boolean isChar) {
            String str = (String)parameters.get((Object)property);
            if (isChar && str != null && !str.isEmpty()) {
                LOG.warn("Separator " + property + " must be a char, only the first character '" + str.charAt(0) + "' of \"" + str + "\" is used");
                str = str.substring(0, 1);
            }
            this.set(str);
            LOG.info(property + " = " + this.toString());
        }

        protected int find(String value, int start) {
            if (this.chars.length == 0) {
                return -1;
            }
            if (this.chars.length == 1) {
                return value.indexOf(this.chars[0], start);
            }
            for (char c : this.chars) {
                int index = value.indexOf(c, start);
                if (index < 0) continue;
                return index;
            }
            return -1;
        }
    }
}

