/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer.anchor;

import java.util.HashSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.parse.Parse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AnchorIndexingFilter
implements IndexingFilter {
    public static final Logger LOG = LoggerFactory.getLogger(AnchorIndexingFilter.class);
    private Configuration conf;
    private boolean deduplicate = false;

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.deduplicate = conf.getBoolean("anchorIndexingFilter.deduplicate", false);
        LOG.info("Anchor deduplication is: " + (this.deduplicate ? "on" : "off"));
    }

    public Configuration getConf() {
        return this.conf;
    }

    public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
        String[] anchors = inlinks != null ? inlinks.getAnchors() : new String[]{};
        HashSet<String> set = null;
        for (int i = 0; i < anchors.length; ++i) {
            if (this.deduplicate) {
                String lcAnchor;
                if (set == null) {
                    set = new HashSet<String>();
                }
                if (set.contains(lcAnchor = anchors[i].toLowerCase())) continue;
                doc.add("anchor", (Object)anchors[i]);
                set.add(lcAnchor);
                continue;
            }
            doc.add("anchor", (Object)anchors[i]);
        }
        return doc;
    }
}

