/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.protocol;

import com.google.common.io.Files;
import crawlercommons.robots.BaseRobotRules;
import crawlercommons.robots.SimpleRobotRules;
import crawlercommons.robots.SimpleRobotRulesParser;
import java.io.File;
import java.io.FileReader;
import java.io.LineNumberReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Hashtable;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.protocol.Protocol;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class RobotRulesParser
implements Configurable {
    public static final Logger LOG = LoggerFactory.getLogger(RobotRulesParser.class);
    protected static final Hashtable<String, BaseRobotRules> CACHE = new Hashtable();
    public static final BaseRobotRules EMPTY_RULES = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_ALL);
    public static BaseRobotRules FORBID_ALL_RULES = new SimpleRobotRules(SimpleRobotRules.RobotRulesMode.ALLOW_NONE);
    private static SimpleRobotRulesParser robotParser = new SimpleRobotRulesParser();
    private Configuration conf;
    protected String agentNames;

    public RobotRulesParser() {
    }

    public RobotRulesParser(Configuration conf) {
        this.setConf(conf);
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        String agentName = conf.get("http.agent.name");
        if (null == agentName) {
            throw new RuntimeException("Agent name not configured!");
        }
        String agentNames = conf.get("http.robots.agents");
        StringTokenizer tok = new StringTokenizer(agentNames, ",");
        ArrayList<String> agents = new ArrayList<String>();
        while (tok.hasMoreTokens()) {
            agents.add(tok.nextToken().trim());
        }
        if (agents.size() == 0) {
            if (LOG.isErrorEnabled()) {
                LOG.error("No agents listed in 'http.robots.agents' property!");
            }
        } else {
            StringBuffer combinedAgentsString = new StringBuffer(agentName);
            int index = 0;
            if (((String)agents.get(0)).equalsIgnoreCase(agentName)) {
                ++index;
            } else if (LOG.isErrorEnabled()) {
                LOG.error("Agent we advertise (" + agentName + ") not listed first in 'http.robots.agents' property!");
            }
            while (index < agents.size()) {
                combinedAgentsString.append(", " + (String)agents.get(index));
                ++index;
            }
            this.agentNames = combinedAgentsString.toString();
        }
    }

    public Configuration getConf() {
        return this.conf;
    }

    public BaseRobotRules parseRules(String url, byte[] content, String contentType, String robotName) {
        return robotParser.parseContent(url, content, contentType, robotName);
    }

    public BaseRobotRules getRobotRulesSet(Protocol protocol, Text url) {
        URL u = null;
        try {
            u = new URL(url.toString());
        }
        catch (Exception e) {
            return EMPTY_RULES;
        }
        return this.getRobotRulesSet(protocol, u);
    }

    public abstract BaseRobotRules getRobotRulesSet(Protocol var1, URL var2);

    public static void main(String[] argv) {
        if (argv.length < 3) {
            System.err.println("Usage: RobotRulesParser <robots-file> <url-file> <agent-names>\n");
            System.err.println("\tThe <robots-file> will be parsed as a robots.txt file,");
            System.err.println("\tusing the given <agent-name> to select rules.  URLs ");
            System.err.println("\twill be read (one per line) from <url-file>, and tested");
            System.err.println("\tagainst the rules. Multiple agent names can be specified using spaces.");
            System.exit(-1);
        }
        try {
            StringBuilder agentNames = new StringBuilder();
            for (int counter = 2; counter < argv.length; ++counter) {
                agentNames.append(argv[counter]).append(",");
            }
            agentNames.deleteCharAt(agentNames.length() - 1);
            byte[] robotsBytes = Files.toByteArray((File)new File(argv[0]));
            BaseRobotRules rules = robotParser.parseContent(argv[0], robotsBytes, "text/plain", agentNames.toString());
            LineNumberReader testsIn = new LineNumberReader(new FileReader(argv[1]));
            String testPath = testsIn.readLine().trim();
            while (testPath != null) {
                System.out.println((rules.isAllowed(testPath) ? "allowed" : "not allowed") + ":\t" + testPath);
                testPath = testsIn.readLine();
            }
            testsIn.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

