/*
 * Decompiled with CFR 0.152.
 */
package org.creativecommons.nutch;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Parse;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CCIndexingFilter
implements IndexingFilter {
    public static final Logger LOG = LoggerFactory.getLogger(CCIndexingFilter.class);
    public static String FIELD = "cc";
    private Configuration conf;

    public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
        String workType;
        String licenseLocation;
        Metadata metadata = parse.getData().getParseMeta();
        String licenseUrl = metadata.get("License-Url");
        if (licenseUrl != null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("CC: indexing " + licenseUrl + " for: " + url.toString());
            }
            this.addFeature(doc, "license=" + licenseUrl);
            this.addUrlFeatures(doc, licenseUrl);
        }
        if ((licenseLocation = metadata.get("License-Location")) != null) {
            this.addFeature(doc, "meta=" + licenseLocation);
        }
        if ((workType = metadata.get("Work-Type")) != null) {
            this.addFeature(doc, workType);
        }
        return doc;
    }

    public void addUrlFeatures(NutchDocument doc, String urlString) {
        block4: {
            try {
                URL url = new URL(urlString);
                StringTokenizer names = new StringTokenizer(url.getPath(), "/-");
                if (names.hasMoreTokens()) {
                    names.nextToken();
                }
                while (names.hasMoreTokens()) {
                    String feature = names.nextToken();
                    this.addFeature(doc, feature);
                }
            }
            catch (MalformedURLException e) {
                if (!LOG.isWarnEnabled()) break block4;
                LOG.warn("CC: failed to parse url: " + urlString + " : " + e);
            }
        }
    }

    private void addFeature(NutchDocument doc, String feature) {
        doc.add(FIELD, (Object)feature);
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public Configuration getConf() {
        return this.conf;
    }
}

