/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.protocol.http.api;

import crawlercommons.robots.BaseRobotRules;
import crawlercommons.robots.SimpleRobotRules;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.RobotRulesParser;
import org.apache.nutch.protocol.http.api.HttpBase;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class HttpRobotRulesParser
extends RobotRulesParser {
    public static final Logger LOG = LoggerFactory.getLogger(HttpRobotRulesParser.class);
    protected boolean allowForbidden = false;

    HttpRobotRulesParser() {
    }

    public HttpRobotRulesParser(Configuration conf) {
        super(conf);
        this.allowForbidden = conf.getBoolean("http.robots.403.allow", false);
    }

    public BaseRobotRules getRobotRulesSet(Protocol http, URL url) {
        String protocol = url.getProtocol().toLowerCase();
        String host = url.getHost().toLowerCase();
        SimpleRobotRules robotRules = (SimpleRobotRules)CACHE.get(protocol + ":" + host);
        boolean cacheRule = true;
        if (robotRules == null) {
            URL redir = null;
            if (LOG.isTraceEnabled()) {
                LOG.trace("cache miss " + url);
            }
            try {
                Response response = ((HttpBase)http).getResponse(new URL(url, "/robots.txt"), new CrawlDatum(), true);
                if (response.getCode() == 301 || response.getCode() == 302) {
                    String redirection = response.getHeader("Location");
                    if (redirection == null) {
                        redirection = response.getHeader("location");
                    }
                    if (redirection != null) {
                        redir = !redirection.startsWith("http") ? new URL(url, redirection) : new URL(redirection);
                        response = ((HttpBase)http).getResponse(redir, new CrawlDatum(), true);
                    }
                }
                if (response.getCode() == 200) {
                    robotRules = this.parseRules(url.toString(), response.getContent(), response.getHeader("Content-Type"), this.agentNames);
                } else if (response.getCode() == 403 && !this.allowForbidden) {
                    robotRules = FORBID_ALL_RULES;
                } else if (response.getCode() >= 500) {
                    cacheRule = false;
                    robotRules = EMPTY_RULES;
                } else {
                    robotRules = EMPTY_RULES;
                }
            }
            catch (Throwable t) {
                if (LOG.isInfoEnabled()) {
                    LOG.info("Couldn't get robots.txt for " + url + ": " + t.toString());
                }
                cacheRule = false;
                robotRules = EMPTY_RULES;
            }
            if (cacheRule) {
                CACHE.put(protocol + ":" + host, robotRules);
                if (redir != null && !redir.getHost().equals(host)) {
                    CACHE.put(protocol + ":" + redir.getHost(), robotRules);
                }
            }
        }
        return robotRules;
    }
}

