/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexwriter.cloudsearch;

import com.amazonaws.regions.RegionUtils;
import com.amazonaws.services.cloudsearchdomain.AmazonCloudSearchDomainClient;
import com.amazonaws.services.cloudsearchdomain.model.ContentType;
import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsRequest;
import com.amazonaws.services.cloudsearchdomain.model.UploadDocumentsResult;
import com.amazonaws.services.cloudsearchv2.AmazonCloudSearchClient;
import com.amazonaws.services.cloudsearchv2.model.DescribeDomainsRequest;
import com.amazonaws.services.cloudsearchv2.model.DescribeDomainsResult;
import com.amazonaws.services.cloudsearchv2.model.DescribeIndexFieldsRequest;
import com.amazonaws.services.cloudsearchv2.model.DescribeIndexFieldsResult;
import com.amazonaws.services.cloudsearchv2.model.DomainStatus;
import com.amazonaws.services.cloudsearchv2.model.IndexFieldStatus;
import com.amazonaws.util.json.JSONException;
import com.amazonaws.util.json.JSONObject;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.AbstractMap;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.indexer.IndexWriter;
import org.apache.nutch.indexer.IndexWriterParams;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.NutchField;
import org.apache.nutch.indexwriter.cloudsearch.CloudSearchUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CloudSearchIndexWriter
implements IndexWriter {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final int MAX_SIZE_BATCH_BYTES = 0x500000;
    private static final int MAX_SIZE_DOC_BYTES = 0x100000;
    private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
    private AmazonCloudSearchDomainClient client;
    private int maxDocsInBatch = -1;
    private StringBuffer buffer;
    private int numDocsInBatch = 0;
    private boolean dumpBatchFilesToTemp = false;
    private Configuration conf;
    private Map<String, String> csfields = new HashMap<String, String>();
    private String endpoint;
    private String regionName;

    public void open(Configuration conf, String name) throws IOException {
    }

    public void open(IndexWriterParams parameters) throws IOException {
        this.endpoint = (String)parameters.get((Object)"endpoint");
        this.dumpBatchFilesToTemp = parameters.getBoolean("batch.dump", false);
        this.regionName = (String)parameters.get((Object)"region");
        if (StringUtils.isBlank((String)this.endpoint) && !this.dumpBatchFilesToTemp) {
            Object message = "Missing CloudSearch endpoint. Should set it set via -D endpoint or in nutch-site.xml";
            message = (String)message + "\n" + this.describe();
            LOG.error((String)message);
            throw new RuntimeException((String)message);
        }
        this.maxDocsInBatch = parameters.getInt("batch.maxSize", -1);
        this.buffer = new StringBuffer(0x500000).append('[');
        if (this.dumpBatchFilesToTemp) {
            return;
        }
        if (StringUtils.isBlank((String)this.endpoint)) {
            throw new RuntimeException("endpoint not set for CloudSearch");
        }
        AmazonCloudSearchClient cl = new AmazonCloudSearchClient();
        if (StringUtils.isNotBlank((String)this.regionName)) {
            cl.setRegion(RegionUtils.getRegion((String)this.regionName));
        }
        String domainName = null;
        DescribeDomainsResult domains = cl.describeDomains(new DescribeDomainsRequest());
        for (DomainStatus ds : domains.getDomainStatusList()) {
            if (!ds.getDocService().getEndpoint().equals(this.endpoint)) continue;
            domainName = ds.getDomainName();
            break;
        }
        if (StringUtils.isBlank(domainName)) {
            throw new RuntimeException("No domain name found for CloudSearch endpoint");
        }
        DescribeIndexFieldsResult indexDescription = cl.describeIndexFields(new DescribeIndexFieldsRequest().withDomainName(domainName));
        for (IndexFieldStatus ifs : indexDescription.getIndexFields()) {
            String indexname = ifs.getOptions().getIndexFieldName();
            String indextype = ifs.getOptions().getIndexFieldType();
            LOG.info("CloudSearch index name {} of type {}", (Object)indexname, (Object)indextype);
            this.csfields.put(indexname, indextype);
        }
        this.client = new AmazonCloudSearchDomainClient();
        this.client.setEndpoint(this.endpoint);
    }

    public void delete(String url) throws IOException {
        try {
            JSONObject doc_builder = new JSONObject();
            doc_builder.put("type", (Object)"delete");
            String ID = CloudSearchUtils.getID(url);
            doc_builder.put("id", (Object)ID);
            this.addToBatch(doc_builder.toString(2), url);
        }
        catch (JSONException e) {
            LOG.error("Exception caught while building JSON object", (Throwable)e);
        }
    }

    public void update(NutchDocument doc) throws IOException {
        this.write(doc);
    }

    public void write(NutchDocument doc) throws IOException {
        try {
            JSONObject doc_builder = new JSONObject();
            doc_builder.put("type", (Object)"add");
            String url = doc.getField("url").toString();
            String ID = CloudSearchUtils.getID(url);
            doc_builder.put("id", (Object)ID);
            JSONObject fields = new JSONObject();
            for (Map.Entry e : doc) {
                String fieldname = this.cleanFieldName((String)e.getKey());
                String type = this.csfields.get(fieldname);
                if (!this.dumpBatchFilesToTemp && type == null) {
                    LOG.info("Field {} not defined in CloudSearch domain for {} - skipping.", (Object)fieldname, (Object)url);
                    continue;
                }
                List values = ((NutchField)e.getValue()).getValues();
                for (Object value : values) {
                    if (value instanceof Date) {
                        Date d = (Date)value;
                        value = DATE_FORMAT.format(d);
                    } else if (value instanceof String) {
                        value = CloudSearchUtils.stripNonCharCodepoints((String)value);
                    }
                    fields.accumulate(fieldname, value);
                }
            }
            doc_builder.put("fields", (Object)fields);
            this.addToBatch(doc_builder.toString(2), url);
        }
        catch (JSONException e) {
            LOG.error("Exception caught while building JSON object", (Throwable)e);
        }
    }

    private void addToBatch(String currentDoc, String url) throws IOException {
        int currentDocLength = currentDoc.getBytes(StandardCharsets.UTF_8).length;
        if (currentDocLength > 0x100000) {
            LOG.error("Doc too large. currentDoc.length {} : {}", (Object)currentDocLength, (Object)url);
            return;
        }
        int currentBufferLength = this.buffer.toString().getBytes(StandardCharsets.UTF_8).length;
        LOG.debug("currentDoc.length {}, buffer length {}", (Object)currentDocLength, (Object)currentBufferLength);
        if (currentDocLength + 2 + currentBufferLength < 0x500000) {
            if (this.numDocsInBatch != 0) {
                this.buffer.append(',');
            }
            this.buffer.append(currentDoc);
            ++this.numDocsInBatch;
        } else {
            this.commit();
            this.buffer.append(currentDoc);
            ++this.numDocsInBatch;
        }
        if (this.maxDocsInBatch > 0 && this.numDocsInBatch == this.maxDocsInBatch) {
            this.commit();
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void commit() throws IOException {
        if (this.numDocsInBatch == 0) {
            return;
        }
        this.buffer.append(']');
        LOG.info("Sending {} docs to CloudSearch", (Object)this.numDocsInBatch);
        byte[] bb = this.buffer.toString().getBytes(StandardCharsets.UTF_8);
        if (this.dumpBatchFilesToTemp) {
            try {
                File temp = File.createTempFile("CloudSearch_", ".json");
                FileUtils.writeByteArrayToFile((File)temp, (byte[])bb);
                LOG.info("Wrote batch file {}", (Object)temp.getName());
            }
            catch (IOException e1) {
                LOG.error("Exception while generating batch file", (Throwable)e1);
            }
            finally {
                this.buffer = new StringBuffer(0x500000).append('[');
                this.numDocsInBatch = 0;
            }
            return;
        }
        try (ByteArrayInputStream inputStream = new ByteArrayInputStream(bb);){
            UploadDocumentsRequest batch = new UploadDocumentsRequest();
            batch.setContentLength(Long.valueOf(bb.length));
            batch.setContentType(ContentType.Applicationjson);
            batch.setDocuments((InputStream)inputStream);
            UploadDocumentsResult uploadDocumentsResult = this.client.uploadDocuments(batch);
        }
        catch (Exception e) {
            LOG.error("Exception while sending batch", (Throwable)e);
            LOG.error(this.buffer.toString());
        }
        finally {
            this.buffer = new StringBuffer(0x500000).append('[');
            this.numDocsInBatch = 0;
        }
    }

    public void close() throws IOException {
        this.commit();
        if (this.client != null) {
            this.client.shutdown();
        }
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public Map<String, Map.Entry<String, Object>> describe() {
        LinkedHashMap<String, Map.Entry<String, Object>> properties = new LinkedHashMap<String, Map.Entry<String, Object>>();
        properties.put("endpoint", new AbstractMap.SimpleEntry<String, String>("Endpoint where service requests should be submitted.", this.endpoint));
        properties.put("region", new AbstractMap.SimpleEntry<String, String>("Region name.", this.regionName));
        properties.put("batch.dump", new AbstractMap.SimpleEntry<String, Boolean>("true to send documents to a local file.", this.dumpBatchFilesToTemp));
        properties.put("batch.maxSize", new AbstractMap.SimpleEntry<String, Integer>("Maximum number of documents to send as a batch to CloudSearch.", this.maxDocsInBatch));
        return properties;
    }

    String cleanFieldName(String name) {
        String lowercase = name.toLowerCase();
        return lowercase.replaceAll("[^a-z_0-9]", "_");
    }
}

