/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse.html;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.util.NodeWalker;
import org.apache.nutch.util.URLUtil;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class DOMContentUtils {
    private HashMap<String, LinkParams> linkParams = new HashMap();
    private Configuration conf;

    public DOMContentUtils(Configuration conf) {
        this.setConf(conf);
    }

    public void setConf(Configuration conf) {
        ArrayList<String> forceTags = new ArrayList<String>(1);
        this.conf = conf;
        this.linkParams.clear();
        this.linkParams.put("a", new LinkParams("a", "href", 1));
        this.linkParams.put("area", new LinkParams("area", "href", 0));
        if (conf.getBoolean("parser.html.form.use_action", true)) {
            this.linkParams.put("form", new LinkParams("form", "action", 1));
            if (conf.get("parser.html.form.use_action") != null) {
                forceTags.add("form");
            }
        }
        this.linkParams.put("frame", new LinkParams("frame", "src", 0));
        this.linkParams.put("iframe", new LinkParams("iframe", "src", 0));
        this.linkParams.put("script", new LinkParams("script", "src", 0));
        this.linkParams.put("link", new LinkParams("link", "href", 0));
        this.linkParams.put("img", new LinkParams("img", "src", 0));
        String[] ignoreTags = conf.getStrings("parser.html.outlinks.ignore_tags");
        for (int i = 0; ignoreTags != null && i < ignoreTags.length; ++i) {
            if (forceTags.contains(ignoreTags[i])) continue;
            this.linkParams.remove(ignoreTags[i]);
        }
    }

    public boolean getText(StringBuffer sb, Node node, boolean abortOnNestedAnchors) {
        return this.getTextHelper(sb, node, abortOnNestedAnchors, 0);
    }

    public void getText(StringBuffer sb, Node node) {
        this.getText(sb, node, false);
    }

    private boolean getTextHelper(StringBuffer sb, Node node, boolean abortOnNestedAnchors, int anchorDepth) {
        boolean abort = false;
        NodeWalker walker = new NodeWalker(node);
        while (walker.hasNext()) {
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if ("script".equalsIgnoreCase(nodeName)) {
                walker.skipChildren();
            }
            if ("style".equalsIgnoreCase(nodeName)) {
                walker.skipChildren();
            }
            if (abortOnNestedAnchors && "a".equalsIgnoreCase(nodeName) && ++anchorDepth > 1) {
                abort = true;
                break;
            }
            if (nodeType == 8) {
                walker.skipChildren();
            }
            if (nodeType != 3) continue;
            String text = currentNode.getNodeValue();
            text = text.replaceAll("\\s+", " ");
            if ((text = text.trim()).length() <= 0) continue;
            if (sb.length() > 0) {
                sb.append(' ');
            }
            sb.append(text);
        }
        return abort;
    }

    public boolean getTitle(StringBuffer sb, Node node) {
        NodeWalker walker = new NodeWalker(node);
        while (walker.hasNext()) {
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if ("body".equalsIgnoreCase(nodeName)) {
                return false;
            }
            if (nodeType != 1 || !"title".equalsIgnoreCase(nodeName)) continue;
            this.getText(sb, currentNode);
            return true;
        }
        return false;
    }

    public URL getBase(Node node) {
        NodeWalker walker = new NodeWalker(node);
        while (walker.hasNext()) {
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            if (nodeType != 1) continue;
            if ("body".equalsIgnoreCase(nodeName)) {
                return null;
            }
            if (!"base".equalsIgnoreCase(nodeName)) continue;
            NamedNodeMap attrs = currentNode.getAttributes();
            for (int i = 0; i < attrs.getLength(); ++i) {
                Node attr = attrs.item(i);
                if (!"href".equalsIgnoreCase(attr.getNodeName())) continue;
                try {
                    return new URL(attr.getNodeValue());
                }
                catch (MalformedURLException e) {
                    // empty catch block
                }
            }
        }
        return null;
    }

    private boolean hasOnlyWhiteSpace(Node node) {
        String val = node.getNodeValue();
        for (int i = 0; i < val.length(); ++i) {
            if (Character.isWhitespace(val.charAt(i))) continue;
            return false;
        }
        return true;
    }

    private boolean shouldThrowAwayLink(Node node, NodeList children, int childLen, LinkParams params) {
        if (childLen == 0) {
            return params.childLen != 0;
        }
        if (childLen == 1 && children.item(0).getNodeType() == 1 && params.elName.equalsIgnoreCase(children.item(0).getNodeName())) {
            return true;
        }
        if (childLen == 2) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            if (c0.getNodeType() == 1 && params.elName.equalsIgnoreCase(c0.getNodeName()) && c1.getNodeType() == 3 && this.hasOnlyWhiteSpace(c1)) {
                return true;
            }
            if (c1.getNodeType() == 1 && params.elName.equalsIgnoreCase(c1.getNodeName()) && c0.getNodeType() == 3 && this.hasOnlyWhiteSpace(c0)) {
                return true;
            }
        } else if (childLen == 3) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            Node c2 = children.item(2);
            if (c1.getNodeType() == 1 && params.elName.equalsIgnoreCase(c1.getNodeName()) && c0.getNodeType() == 3 && c2.getNodeType() == 3 && this.hasOnlyWhiteSpace(c0) && this.hasOnlyWhiteSpace(c2)) {
                return true;
            }
        }
        return false;
    }

    public void getOutlinks(URL base, ArrayList<Outlink> outlinks, Node node) {
        NodeWalker walker = new NodeWalker(node);
        while (walker.hasNext()) {
            LinkParams params;
            int childLen;
            Node currentNode = walker.nextNode();
            String nodeName = currentNode.getNodeName();
            short nodeType = currentNode.getNodeType();
            NodeList children = currentNode.getChildNodes();
            int n = childLen = children != null ? children.getLength() : 0;
            if (nodeType != 1 || (params = this.linkParams.get(nodeName = nodeName.toLowerCase())) == null) continue;
            if (!this.shouldThrowAwayLink(currentNode, children, childLen, params)) {
                StringBuffer linkText = new StringBuffer();
                this.getText(linkText, currentNode, true);
                if (linkText.toString().trim().length() == 0) {
                    NodeWalker subWalker = new NodeWalker(currentNode);
                    while (subWalker.hasNext()) {
                        String txt;
                        Node subNode = subWalker.nextNode();
                        if (subNode.getNodeType() == 1) {
                            String altTxt;
                            NamedNodeMap subAttrs;
                            Node alt;
                            if (!subNode.getNodeName().toLowerCase().equals("img") || (alt = (subAttrs = subNode.getAttributes()).getNamedItem("alt")) == null || (altTxt = alt.getTextContent()) == null || altTxt.trim().length() <= 0) continue;
                            if (linkText.length() > 0) {
                                linkText.append(' ');
                            }
                            linkText.append(altTxt);
                            continue;
                        }
                        if (subNode.getNodeType() != 3 || (txt = subNode.getTextContent()) == null || txt.length() <= 0) continue;
                        if (linkText.length() > 0) {
                            linkText.append(' ');
                        }
                        linkText.append(txt);
                    }
                }
                NamedNodeMap attrs = currentNode.getAttributes();
                String target = null;
                boolean noFollow = false;
                boolean post = false;
                for (int i = 0; i < attrs.getLength(); ++i) {
                    Node attr = attrs.item(i);
                    String attrName = attr.getNodeName();
                    if (params.attrName.equalsIgnoreCase(attrName)) {
                        target = attr.getNodeValue();
                        continue;
                    }
                    if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                        noFollow = true;
                        continue;
                    }
                    if (!"method".equalsIgnoreCase(attrName) || !"post".equalsIgnoreCase(attr.getNodeValue())) continue;
                    post = true;
                }
                if (target != null && !noFollow && !post) {
                    try {
                        URL url = URLUtil.resolveURL((URL)base, target);
                        outlinks.add(new Outlink(url.toString(), linkText.toString().trim()));
                    }
                    catch (MalformedURLException e) {
                        // empty catch block
                    }
                }
            }
            if (params.childLen != 0) continue;
        }
    }

    public static class LinkParams {
        public String elName;
        public String attrName;
        public int childLen;

        public LinkParams(String elName, String attrName, int childLen) {
            this.elName = elName;
            this.attrName = attrName;
            this.childLen = childLen;
        }

        public String toString() {
            return "LP[el=" + this.elName + ",attr=" + this.attrName + ",len=" + this.childLen + "]";
        }
    }
}

