/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * XXX NUTCH COMMENT
 * XXX ab@apache.org: This class is copied verbatim from Xalan-J 2.6.0
 * XXX distribution, org.apache.xml.utils.DOMBuilder, in order to
 * avoid dependency on Xalan.
 */

package org.apache.stormcrawler.tika;

import java.util.Locale;
import java.util.Stack;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.ext.LexicalHandler;

/**
 * This class takes SAX events (in addition to some extra events that SAX doesn't handle yet) and
 * adds the result to a document or document fragment.
 */
public class DOMBuilder implements ContentHandler, LexicalHandler {

    private boolean upperCaseElementNames = true;

    /** Root document */
    private Document m_doc;

    /** Current node */
    private Node m_currentNode = null;

    /** First node of document fragment or null if not a DocumentFragment */
    private DocumentFragment m_docFrag = null;

    /** Vector of element nodes */
    private Stack<Element> m_elemStack = new Stack<>();

    /** Element recorded with this namespace will be converted to Node without a namespace */
    private String defaultNamespaceURI = null;

    /**
     * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment.
     *
     * @param doc Root document
     * @param node Current node
     */
    public DOMBuilder(Document doc, Node node) {
        m_doc = doc;
        m_currentNode = node;
    }

    /**
     * DOMBuilder instance constructor... it will add the DOM nodes to the document fragment.
     *
     * @param doc Root document
     * @param docFrag Document fragment
     */
    DOMBuilder(Document doc, DocumentFragment docFrag) {
        m_doc = doc;
        m_docFrag = docFrag;
    }

    /**
     * DOMBuilder instance constructor... it will add the DOM nodes to the document.
     *
     * @param doc Root document
     */
    DOMBuilder(Document doc) {
        m_doc = doc;
    }

    /**
     * Get the root node of the DOM being created. This is either a Document or a DocumentFragment.
     *
     * @return The root document or document fragment if not null
     */
    Node getRootNode() {
        return (null != m_docFrag) ? (Node) m_docFrag : (Node) m_doc;
    }

    /**
     * Get the node currently being processed.
     *
     * @return the current node being processed
     */
    Node getCurrentNode() {
        return m_currentNode;
    }

    /**
     * Return null since there is no Writer for this class.
     *
     * @return null
     */
    java.io.Writer getWriter() {
        return null;
    }

    /**
     * Append a node to the current container.
     *
     * @param newNode New node to append
     */
    protected void append(Node newNode) throws org.xml.sax.SAXException {

        Node currentNode = m_currentNode;

        if (null != currentNode) {
            currentNode.appendChild(newNode);

            // System.out.println(newNode.getNodeName());
        } else if (null != m_docFrag) {
            m_docFrag.appendChild(newNode);
        } else {
            boolean ok = true;
            short type = newNode.getNodeType();

            if (type == Node.TEXT_NODE) {
                String data = newNode.getNodeValue();

                if ((null != data) && (data.trim().length() > 0)) {
                    throw new org.xml.sax.SAXException(
                            "Warning: can't output text before document element!  Ignoring...");
                }

                ok = false;
            } else if (type == Node.ELEMENT_NODE) {
                if (m_doc.getDocumentElement() != null) {
                    throw new org.xml.sax.SAXException("Can't have more than one root on a DOM!");
                }
            }

            if (ok) {
                m_doc.appendChild(newNode);
            }
        }
    }

    /**
     * Receive an object for locating the origin of SAX document events.
     *
     * <p>SAX parsers are strongly encouraged (though not absolutely required) to supply a locator:
     * if it does so, it must supply the locator to the application by invoking this method before
     * invoking any of the other methods in the ContentHandler interface.
     *
     * <p>The locator allows the application to determine the end position of any document-related
     * event, even if the parser is not reporting an error. Typically, the application will use this
     * information for reporting its own errors (such as character content that does not match an
     * application's business rules). The information returned by the locator is probably not
     * sufficient for use with a search engine.
     *
     * <p>Note that the locator will return correct information only during the invocation of the
     * events in this interface. The application should not attempt to use it at any other time.
     *
     * @param locator An object that can return the location of any SAX document event.
     * @see org.xml.sax.Locator
     */
    @Override
    public void setDocumentLocator(Locator locator) {

        // No action for the moment.
    }

    /**
     * Receive notification of the beginning of a document.
     *
     * <p>The SAX parser will invoke this method only once, before any other methods in this
     * interface or in DTDHandler (except for setDocumentLocator).
     */
    @Override
    public void startDocument() throws org.xml.sax.SAXException {

        // No action for the moment.
    }

    /**
     * Receive notification of the end of a document.
     *
     * <p>The SAX parser will invoke this method only once, and it will be the last method invoked
     * during the parse. The parser shall not invoke this method until it has either abandoned
     * parsing (because of an unrecoverable error) or reached the end of input.
     */
    @Override
    public void endDocument() throws org.xml.sax.SAXException {

        // No action for the moment.
    }

    /**
     * Receive notification of the beginning of an element.
     *
     * <p>The Parser will invoke this method at the beginning of every element in the XML document;
     * there will be a corresponding endElement() event for every startElement() event (even when
     * the element is empty). All of the element's content will be reported, in order, before the
     * corresponding endElement() event.
     *
     * <p>If the element name has a namespace prefix, the prefix will still be attached. Note that
     * the attribute list provided will contain only attributes with explicit values (specified or
     * defaulted): #IMPLIED attributes will be omitted.
     *
     * @param ns The namespace of the node
     * @param localName The local part of the qualified name
     * @param name The element name.
     * @param atts The attributes attached to the element, if any.
     * @see #endElement
     * @see org.xml.sax.Attributes
     */
    @Override
    public void startElement(String ns, String localName, String name, Attributes atts)
            throws org.xml.sax.SAXException {

        Element elem;

        if (upperCaseElementNames) {
            name = name.toUpperCase(Locale.ROOT);
        }

        // Note that the namespace-aware call must be used to correctly
        // construct a Level 2 DOM, even for non-namespaced nodes.
        if ((null == ns) || (ns.length() == 0) || ns.equals(defaultNamespaceURI)) {
            elem = m_doc.createElementNS(null, name);
        } else {
            elem = m_doc.createElementNS(ns, name);
        }

        append(elem);

        try {
            int nAtts = atts.getLength();

            if (0 != nAtts) {
                for (int i = 0; i < nAtts; i++) {
                    // First handle a possible ID attribute
                    if (atts.getType(i).equalsIgnoreCase("ID")) {
                        setIDAttribute(atts.getValue(i), elem);
                    }

                    String attrNS = atts.getURI(i);

                    if ("".equals(attrNS)) {
                        attrNS = null; // DOM represents no-namespace as null
                    }

                    // System.out.println("attrNS: "+attrNS+", localName: "+atts.getQName(i)
                    // +", qname: "+atts.getQName(i)+", value: "+atts.getValue(i));
                    // Crimson won't let us set an xmlns: attribute on the DOM.
                    String attrQName = atts.getQName(i);

                    // In SAX, xmlns: attributes have an empty namespace, while
                    // in DOM they should have the xmlns namespace
                    if (attrQName.startsWith("xmlns:")) {
                        attrNS = "http://www.w3.org/2000/xmlns/";
                    }

                    // ALWAYS use the DOM Level 2 call!
                    elem.setAttributeNS(attrNS, attrQName, atts.getValue(i));
                }
            }

            // append(elem);

            m_elemStack.push(elem);

            m_currentNode = elem;

            // append(elem);
        } catch (java.lang.Exception de) {
            // de.printStackTrace();
            throw new org.xml.sax.SAXException(de);
        }
    }

    /**
     * Receive notification of the end of an element.
     *
     * <p>The SAX parser will invoke this method at the end of every element in the XML document;
     * there will be a corresponding startElement() event for every endElement() event (even when
     * the element is empty).
     *
     * <p>If the element name has a namespace prefix, the prefix will still be attached to the name.
     *
     * @param ns the namespace of the element
     * @param localName The local part of the qualified name of the element
     * @param name The element name
     */
    @Override
    public void endElement(String ns, String localName, String name)
            throws org.xml.sax.SAXException {
        m_elemStack.pop();
        m_currentNode = m_elemStack.isEmpty() ? null : (Node) m_elemStack.peek();
    }

    /**
     * Set an ID string to node association in the ID table.
     *
     * @param id The ID string.
     * @param elem The associated ID.
     */
    public void setIDAttribute(String id, Element elem) {

        // Do nothing. This method is meant to be overiden.
    }

    /**
     * Receive notification of character data.
     *
     * <p>The Parser will call this method to report each chunk of character data. SAX parsers may
     * return all contiguous character data in a single chunk, or they may split it into several
     * chunks; however, all of the characters in any single event must come from the same external
     * entity, so that the Locator provides useful information.
     *
     * <p>The application must not attempt to read from the array outside of the specified range.
     *
     * <p>Note that some parsers will report whitespace using the ignorableWhitespace() method
     * rather than this one (validating parsers must do so).
     *
     * @param ch The characters from the XML document.
     * @param start The start position in the array.
     * @param length The number of characters to read from the array.
     * @see #ignorableWhitespace
     * @see org.xml.sax.Locator
     */
    @Override
    public void characters(char ch[], int start, int length) throws org.xml.sax.SAXException {
        if (isOutsideDocElem() && XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) {
            return; // avoid DOM006 Hierarchy request error
        }

        if (m_inCData) {
            cdata(ch, start, length);

            return;
        }

        String s = new String(ch, start, length);
        Node childNode;
        childNode = m_currentNode != null ? m_currentNode.getLastChild() : null;
        if (childNode != null && childNode.getNodeType() == Node.TEXT_NODE) {
            ((Text) childNode).appendData(s);
        } else {
            Text text = m_doc.createTextNode(s);
            append(text);
        }
    }

    /**
     * If available, when the disable-output-escaping attribute is used, output raw text without
     * escaping. A PI will be inserted in front of the node with the name "lotusxsl-next-is-raw" and
     * a value of "formatter-to-dom".
     *
     * @param ch Array containing the characters
     * @param start Index to start of characters in the array
     * @param length Number of characters in the array
     */
    public void charactersRaw(char ch[], int start, int length) throws org.xml.sax.SAXException {
        if (isOutsideDocElem() && XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) {
            return; // avoid DOM006 Hierarchy request error
        }

        String s = new String(ch, start, length);

        append(m_doc.createProcessingInstruction("xslt-next-is-raw", "formatter-to-dom"));
        append(m_doc.createTextNode(s));
    }

    /**
     * Report the beginning of an entity.
     *
     * <p>The start and end of the document entity are not reported. The start and end of the
     * external DTD subset are reported using the pseudo-name "[dtd]". All other events must be
     * properly nested within start/end entity events.
     *
     * @param name The name of the entity. If it is a parameter entity, the name will begin with
     *     '%'.
     * @see #endEntity
     * @see org.xml.sax.ext.DeclHandler#internalEntityDecl
     * @see org.xml.sax.ext.DeclHandler#externalEntityDecl
     */
    @Override
    public void startEntity(String name) throws org.xml.sax.SAXException {

        // Almost certainly the wrong behavior...
        // entityReference(name);
    }

    /**
     * Report the end of an entity.
     *
     * @param name The name of the entity that is ending.
     * @see #startEntity
     */
    @Override
    public void endEntity(String name) throws org.xml.sax.SAXException {}

    /**
     * Receive notivication of a entityReference.
     *
     * @param name name of the entity reference
     */
    public void entityReference(String name) throws org.xml.sax.SAXException {
        append(m_doc.createEntityReference(name));
    }

    /**
     * Receive notification of ignorable whitespace in element content.
     *
     * <p>Validating Parsers must use this method to report each chunk of ignorable whitespace (see
     * the W3C XML 1.0 recommendation, section 2.10): non-validating parsers may also use this
     * method if they are capable of parsing and using content models.
     *
     * <p>SAX parsers may return all contiguous whitespace in a single chunk, or they may split it
     * into several chunks; however, all of the characters in any single event must come from the
     * same external entity, so that the Locator provides useful information.
     *
     * <p>The application must not attempt to read from the array outside of the specified range.
     *
     * @param ch The characters from the XML document.
     * @param start The start position in the array.
     * @param length The number of characters to read from the array.
     * @see #characters
     */
    @Override
    public void ignorableWhitespace(char ch[], int start, int length)
            throws org.xml.sax.SAXException {
        if (isOutsideDocElem()) {
            return; // avoid DOM006 Hierarchy request error
        }

        String s = new String(ch, start, length);

        append(m_doc.createTextNode(s));
    }

    /**
     * Tell if the current node is outside the document element.
     *
     * @return true if the current node is outside the document element.
     */
    private boolean isOutsideDocElem() {
        return (null == m_docFrag)
                && m_elemStack.size() == 0
                && (null == m_currentNode || m_currentNode.getNodeType() == Node.DOCUMENT_NODE);
    }

    /**
     * Receive notification of a processing instruction.
     *
     * <p>The Parser will invoke this method once for each processing instruction found: note that
     * processing instructions may occur before or after the main document element.
     *
     * <p>A SAX parser should never report an XML declaration (XML 1.0, section 2.8) or a text
     * declaration (XML 1.0, section 4.3.1) using this method.
     *
     * @param target The processing instruction target.
     * @param data The processing instruction data, or null if none was supplied.
     */
    @Override
    public void processingInstruction(String target, String data) throws org.xml.sax.SAXException {
        append(m_doc.createProcessingInstruction(target, data));
    }

    /**
     * Report an XML comment anywhere in the document.
     *
     * <p>This callback will be used for comments inside or outside the document element, including
     * comments in the external DTD subset (if read).
     *
     * @param ch An array holding the characters in the comment.
     * @param start The starting position in the array.
     * @param length The number of characters to use from the array.
     */
    @Override
    public void comment(char ch[], int start, int length) throws org.xml.sax.SAXException {
        // tagsoup sometimes submits invalid values here
        if (ch == null || start < 0 || length >= (ch.length - start) || length < 0) {
            return;
        }
        append(m_doc.createComment(new String(ch, start, length)));
    }

    /** Flag indicating that we are processing a CData section */
    protected boolean m_inCData = false;

    /**
     * Report the start of a CDATA section.
     *
     * @see #endCDATA
     */
    @Override
    public void startCDATA() throws org.xml.sax.SAXException {
        m_inCData = true;
        append(m_doc.createCDATASection(""));
    }

    /**
     * Report the end of a CDATA section.
     *
     * @see #startCDATA
     */
    @Override
    public void endCDATA() throws org.xml.sax.SAXException {
        m_inCData = false;
    }

    /**
     * Receive notification of cdata.
     *
     * <p>The Parser will call this method to report each chunk of character data. SAX parsers may
     * return all contiguous character data in a single chunk, or they may split it into several
     * chunks; however, all of the characters in any single event must come from the same external
     * entity, so that the Locator provides useful information.
     *
     * <p>The application must not attempt to read from the array outside of the specified range.
     *
     * <p>Note that some parsers will report whitespace using the ignorableWhitespace() method
     * rather than this one (validating parsers must do so).
     *
     * @param ch The characters from the XML document.
     * @param start The start position in the array.
     * @param length The number of characters to read from the array.
     * @see #ignorableWhitespace
     * @see org.xml.sax.Locator
     */
    public void cdata(char ch[], int start, int length) {
        if (isOutsideDocElem() && XMLCharacterRecognizer.isWhiteSpace(ch, start, length)) {
            return; // avoid DOM006 Hierarchy request error
        }

        String s = new String(ch, start, length);

        // XXX ab@apache.org: modified from the original, to accomodate TagSoup.
        Node n = m_currentNode.getLastChild();
        if (n instanceof CDATASection) {
            ((CDATASection) n).appendData(s);
        } else if (n instanceof Comment) {
            ((Comment) n).appendData(s);
        }
    }

    /**
     * Report the start of DTD declarations, if any.
     *
     * <p>Any declarations are assumed to be in the internal subset unless otherwise indicated.
     *
     * @param name The document type name.
     * @param publicId The declared public identifier for the external DTD subset, or null if none
     *     was declared.
     * @param systemId The declared system identifier for the external DTD subset, or null if none
     *     was declared.
     * @see #endDTD
     * @see #startEntity
     */
    @Override
    public void startDTD(String name, String publicId, String systemId)
            throws org.xml.sax.SAXException {

        // Do nothing for now.
    }

    /**
     * Report the end of DTD declarations.
     *
     * @see #startDTD
     */
    @Override
    public void endDTD() throws org.xml.sax.SAXException {

        // Do nothing for now.
    }

    /**
     * Begin the scope of a prefix-URI Namespace mapping.
     *
     * <p>The information from this event is not necessary for normal Namespace processing: the SAX
     * XML reader will automatically replace prefixes for element and attribute names when the
     * http://xml.org/sax/features/namespaces feature is true (the default).
     *
     * <p>There are cases, however, when applications need to use prefixes in character data or in
     * attribute values, where they cannot safely be expanded automatically; the
     * start/endPrefixMapping event supplies the information to the application to expand prefixes
     * in those contexts itself, if necessary.
     *
     * <p>Note that start/endPrefixMapping events are not guaranteed to be properly nested relative
     * to each-other: all startPrefixMapping events will occur before the corresponding startElement
     * event, and all endPrefixMapping events will occur after the corresponding endElement event,
     * but their order is not guaranteed.
     *
     * @param prefix The Namespace prefix being declared.
     * @param uri The Namespace URI the prefix is mapped to.
     * @see #endPrefixMapping
     * @see #startElement
     */
    @Override
    public void startPrefixMapping(String prefix, String uri) throws org.xml.sax.SAXException {

        /*
         * // Not sure if this is needed or wanted // Also, it fails in the
         * stree. if((null != m_currentNode) && (m_currentNode.getNodeType() ==
         * Node.ELEMENT_NODE)) { String qname; if(((null != prefix) &&
         * (prefix.length() == 0)) || (null == prefix)) qname = "xmlns"; else
         * qname = "xmlns:"+prefix;
         *
         * Element elem = (Element)m_currentNode; String val =
         * elem.getAttribute(qname); // Obsolete, should be DOM2...? if(val ==
         * null) { elem.setAttributeNS("http://www.w3.org/XML/1998/namespace",
         * qname, uri); } }
         */
    }

    /**
     * End the scope of a prefix-URI mapping.
     *
     * <p>See startPrefixMapping for details. This event will always occur after the corresponding
     * endElement event, but the order of endPrefixMapping events is not otherwise guaranteed.
     *
     * @param prefix The prefix that was being mapping.
     * @see #startPrefixMapping
     * @see #endElement
     */
    @Override
    public void endPrefixMapping(String prefix) throws org.xml.sax.SAXException {}

    /**
     * Receive notification of a skipped entity.
     *
     * <p>The Parser will invoke this method once for each entity skipped. Non-validating processors
     * may skip entities if they have not seen the declarations (because, for example, the entity
     * was declared in an external DTD subset). All processors may skip external entities, depending
     * on the values of the http://xml.org/sax/features/external-general-entities and the
     * http://xml.org/sax/features/external-parameter-entities properties.
     *
     * @param name The name of the skipped entity. If it is a parameter entity, the name will begin
     *     with '%'.
     */
    @Override
    public void skippedEntity(String name) throws org.xml.sax.SAXException {}

    public boolean isUpperCaseElementNames() {
        return upperCaseElementNames;
    }

    public void setUpperCaseElementNames(boolean upperCaseElementNames) {
        this.upperCaseElementNames = upperCaseElementNames;
    }

    public String getDefaultNamespaceURI() {
        return defaultNamespaceURI;
    }

    public void setDefaultNamespaceURI(String defaultNamespaceURI) {
        this.defaultNamespaceURI = defaultNamespaceURI;
    }
}
