/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CrawlDbFilter
implements Mapper<Text, CrawlDatum, Text, CrawlDatum> {
    public static final String URL_FILTERING = "crawldb.url.filters";
    public static final String URL_NORMALIZING = "crawldb.url.normalizers";
    public static final String URL_NORMALIZING_SCOPE = "crawldb.url.normalizers.scope";
    private boolean urlFiltering;
    private boolean urlNormalizers;
    private boolean url404Purging;
    private URLFilters filters;
    private URLNormalizers normalizers;
    private String scope;
    public static final Logger LOG = LoggerFactory.getLogger(CrawlDbFilter.class);
    private Text newKey = new Text();

    public void configure(JobConf job) {
        this.urlFiltering = job.getBoolean(URL_FILTERING, false);
        this.urlNormalizers = job.getBoolean(URL_NORMALIZING, false);
        this.url404Purging = job.getBoolean("db.update.purge.404", false);
        if (this.urlFiltering) {
            this.filters = new URLFilters((Configuration)job);
        }
        if (this.urlNormalizers) {
            this.scope = job.get(URL_NORMALIZING_SCOPE, "crawldb");
            this.normalizers = new URLNormalizers((Configuration)job, this.scope);
        }
    }

    public void close() {
    }

    public void map(Text key, CrawlDatum value, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
        String url = key.toString();
        if (this.url404Purging && 3 == value.getStatus()) {
            url = null;
        }
        if (url != null && this.urlNormalizers) {
            try {
                url = this.normalizers.normalize(url, this.scope);
            }
            catch (Exception e) {
                LOG.warn("Skipping " + url + ":" + e);
                url = null;
            }
        }
        if (url != null && this.urlFiltering) {
            try {
                url = this.filters.filter(url);
            }
            catch (Exception e) {
                LOG.warn("Skipping " + url + ":" + e);
                url = null;
            }
        }
        if (url != null) {
            this.newKey.set(url);
            output.collect((Object)this.newKey, (Object)value);
        }
    }
}

