/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer.basic;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.Inlinks;
import org.apache.nutch.indexer.IndexingException;
import org.apache.nutch.indexer.IndexingFilter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class BasicIndexingFilter
implements IndexingFilter {
    public static final Logger LOG = LoggerFactory.getLogger(BasicIndexingFilter.class);
    private int MAX_TITLE_LENGTH;
    private int MAX_CONTENT_LENGTH;
    private boolean addDomain = false;
    private Configuration conf;

    public NutchDocument filter(NutchDocument doc, Parse parse, Text url, CrawlDatum datum, Inlinks inlinks) throws IndexingException {
        String caching;
        Text reprUrl = (Text)datum.getMetaData().get((Object)Nutch.WRITABLE_REPR_URL_KEY);
        String reprUrlString = reprUrl != null ? reprUrl.toString() : null;
        String urlString = url.toString();
        String host = null;
        try {
            URL u = reprUrlString != null ? new URL(reprUrlString) : new URL(urlString);
            if (this.addDomain) {
                doc.add("domain", (Object)URLUtil.getDomainName((URL)u));
            }
            host = u.getHost();
        }
        catch (MalformedURLException e) {
            throw new IndexingException((Throwable)e);
        }
        if (host != null) {
            doc.add("host", (Object)host);
        }
        doc.add("url", (Object)(reprUrlString == null ? urlString : reprUrlString));
        String content = parse.getText();
        if (this.MAX_CONTENT_LENGTH > -1 && content.length() > this.MAX_CONTENT_LENGTH) {
            content = content.substring(0, this.MAX_CONTENT_LENGTH);
        }
        doc.add("content", (Object)StringUtil.cleanField((String)content));
        String title = parse.getData().getTitle();
        if (this.MAX_TITLE_LENGTH > -1 && title.length() > this.MAX_TITLE_LENGTH) {
            title = title.substring(0, this.MAX_TITLE_LENGTH);
        }
        if (title.length() > 0) {
            doc.add("title", (Object)StringUtil.cleanField((String)title));
        }
        if ((caching = parse.getData().getMeta("caching.forbidden")) != null && !caching.equals("none")) {
            doc.add("cache", (Object)caching);
        }
        doc.add("tstamp", (Object)new Date(datum.getFetchTime()));
        return doc;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);
        this.addDomain = conf.getBoolean("indexer.add.domain", false);
        this.MAX_CONTENT_LENGTH = conf.getInt("indexer.max.content.length", -1);
    }

    public Configuration getConf() {
        return this.conf;
    }
}

