/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.microformats.reltag;

import java.net.URL;
import java.net.URLDecoder;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutch.parse.HtmlParseFilter;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class RelTagParser
implements HtmlParseFilter {
    public static final Logger LOG = LoggerFactory.getLogger(RelTagParser.class);
    public static final String REL_TAG = "Rel-Tag";
    private Configuration conf = null;

    public ParseResult filter(Content content, ParseResult parseResult, HTMLMetaTags metaTags, DocumentFragment doc) {
        Parse parse = parseResult.get(content.getUrl());
        Parser parser = new Parser(doc);
        Set<String> tags = parser.getRelTags();
        Iterator<String> iter = tags.iterator();
        Metadata metadata = parse.getData().getParseMeta();
        while (iter.hasNext()) {
            metadata.add(REL_TAG, iter.next());
        }
        return parseResult;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public Configuration getConf() {
        return this.conf;
    }

    private static class Parser {
        Set<String> tags = new TreeSet<String>();

        Parser(Node node) {
            this.parse(node);
        }

        Set<String> getRelTags() {
            return this.tags;
        }

        void parse(Node node) {
            String tag;
            Node relNode;
            NamedNodeMap attrs;
            Node hrefNode;
            if (node.getNodeType() == 1 && "a".equalsIgnoreCase(node.getNodeName()) && (hrefNode = (attrs = node.getAttributes()).getNamedItem("href")) != null && (relNode = attrs.getNamedItem("rel")) != null && "tag".equalsIgnoreCase(relNode.getNodeValue()) && !StringUtil.isEmpty((String)(tag = Parser.parseTag(hrefNode.getNodeValue()))) && !this.tags.contains(tag)) {
                this.tags.add(tag);
                LOG.debug("Adding tag: " + tag + " to tag set.");
            }
            NodeList children = node.getChildNodes();
            for (int i = 0; children != null && i < children.getLength(); ++i) {
                this.parse(children.item(i));
            }
        }

        private static final String parseTag(String url) {
            String tag = null;
            try {
                URL u = new URL(url);
                String path = u.getPath();
                tag = URLDecoder.decode(path.substring(path.lastIndexOf(47) + 1), "UTF-8");
            }
            catch (Exception e) {
                tag = null;
            }
            return tag;
        }
    }
}

