/*
 * Decompiled with CFR 0.152.
 */
package org.apache.manifoldcf.crawler.connectors.rss;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.crawler.connectors.rss.ThrottledFetcher;
import org.apache.manifoldcf.crawler.interfaces.IVersionActivity;
import org.apache.manifoldcf.crawler.system.Logging;

public class Robots {
    public static final String _rcsid = "@(#)$Id: Robots.java 988245 2010-08-23 18:39:35Z kwright $";
    protected static final int ROBOT_TIMEOUT_MILLISECONDS = 60000;
    protected static final String ROBOT_CONNECTION_TYPE = "Robot";
    protected static final String ROBOT_FILE_NAME = "/robots.txt";
    protected ThrottledFetcher fetcher;
    protected int refCount = 0;
    protected Map cache = new HashMap();

    public Robots(ThrottledFetcher fetcher) {
        this.fetcher = fetcher;
    }

    public synchronized void noteConnectionEstablished() {
        ++this.refCount;
    }

    public synchronized void noteConnectionReleased() {
        --this.refCount;
        if (this.refCount == 0) {
            this.cache.clear();
        }
    }

    public synchronized void poll() {
        HashMap<String, Host> newCache = new HashMap<String, Host>();
        long currentTime = System.currentTimeMillis();
        for (String identifyingString : this.cache.keySet()) {
            Host host = (Host)this.cache.get(identifyingString);
            if (host.canBeFlushed(currentTime)) continue;
            newCache.put(identifyingString, host);
        }
        this.cache = newCache;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public boolean isFetchAllowed(String protocol, int port, String hostName, String pathString, String userAgent, String from, double minimumMillisecondsPerBytePerServer, int maxOpenConnectionsPerServer, long minimumMillisecondsPerFetchPerServer, String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword, IVersionActivity activities, int connectionLimit) throws ManifoldCFException, ServiceInterruption {
        Host host;
        String identifyingString = protocol + "://" + hostName;
        if (port != -1) {
            identifyingString = identifyingString + ":" + Integer.toString(port);
        }
        Robots robots = this;
        synchronized (robots) {
            host = (Host)this.cache.get(identifyingString);
            if (host == null) {
                host = new Host(protocol, port, hostName);
                this.cache.put(identifyingString, host);
            }
        }
        return host.isFetchAllowed(System.currentTimeMillis(), pathString, userAgent, from, minimumMillisecondsPerBytePerServer, maxOpenConnectionsPerServer, minimumMillisecondsPerFetchPerServer, proxyHost, proxyPort, proxyAuthDomain, proxyAuthUsername, proxyAuthPassword, activities, connectionLimit);
    }

    protected static String makeReadable(String inputString) {
        StringBuffer sb = new StringBuffer();
        int i = 0;
        while (i < inputString.length()) {
            char y;
            if ((y = inputString.charAt(i++)) >= ' ') {
                sb.append(y);
                continue;
            }
            sb.append('^');
            sb.append((char)(y + 64));
        }
        return sb.toString();
    }

    protected static boolean doesPathMatch(String path, String spec) {
        return Robots.doesPathMatch(path, 0, spec, 0);
    }

    protected static boolean doesPathMatch(String path, int pathIndex, String spec, int specIndex) {
        while (specIndex != spec.length()) {
            char specChar;
            if ((specChar = spec.charAt(specIndex++)) == '*') {
                while (specIndex < spec.length() && spec.charAt(specIndex) == '*') {
                    ++specIndex;
                }
                while (true) {
                    boolean match;
                    if (match = Robots.doesPathMatch(path, pathIndex, spec, specIndex)) {
                        return true;
                    }
                    if (path.length() == pathIndex) {
                        return false;
                    }
                    ++pathIndex;
                }
            }
            if (specChar == '$' && specIndex == spec.length()) {
                return pathIndex == path.length();
            }
            if (pathIndex == path.length()) {
                return false;
            }
            if (path.charAt(pathIndex) != specChar) {
                return false;
            }
            ++pathIndex;
        }
        return true;
    }

    protected static class Record {
        protected ArrayList userAgents = new ArrayList();
        protected ArrayList disallows = new ArrayList();
        protected ArrayList allows = new ArrayList();

        public void addAgent(String agentName) {
            this.userAgents.add(agentName);
        }

        public void addDisallow(String disallowPath) {
            this.disallows.add(disallowPath);
        }

        public void addAllow(String allowPath) {
            this.allows.add(allowPath);
        }

        public boolean isAgentMatch(String agentNameUpper, boolean exactMatch) {
            int i = 0;
            while (i < this.userAgents.size()) {
                String agent = ((String)this.userAgents.get(i++)).toUpperCase();
                if (exactMatch && agent.trim().equals(agentNameUpper)) {
                    return true;
                }
                if (exactMatch || agentNameUpper.indexOf(agent) == -1) continue;
                return true;
            }
            return false;
        }

        public boolean isDisallowed(String path) {
            int i = 0;
            while (i < this.disallows.size()) {
                String disallow;
                if (!Robots.doesPathMatch(path, disallow = (String)this.disallows.get(i++))) continue;
                return true;
            }
            return false;
        }

        public boolean isAllowed(String path) {
            int i = 0;
            while (i < this.allows.size()) {
                String allow;
                if (!Robots.doesPathMatch(path, allow = (String)this.allows.get(i++))) continue;
                return true;
            }
            return false;
        }
    }

    protected class Host {
        protected String protocol;
        protected int port;
        protected String hostName;
        protected long invalidTime = -1L;
        protected boolean isValid = false;
        protected ArrayList records = null;
        protected boolean readingRobots = false;
        protected int checkingRobots = 0;

        public Host(String protocol, int port, String hostName) {
            this.protocol = protocol;
            this.port = port;
            this.hostName = hostName;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public boolean isFetchAllowed(long currentTime, String pathString, String userAgent, String from, double minimumMillisecondsPerBytePerServer, int maxOpenConnectionsPerServer, long minimumMillisecondsPerFetchPerServer, String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword, IVersionActivity activities, int connectionLimit) throws ServiceInterruption, ManifoldCFException {
            block49: {
                Host host = this;
                synchronized (host) {
                    block48: {
                        while (true) {
                            if (this.readingRobots) {
                                try {
                                    this.wait();
                                }
                                catch (InterruptedException e) {
                                    throw new ManifoldCFException("Interrupted: " + e.getMessage(), (Throwable)e, 2);
                                }
                            }
                            if (this.isValid && currentTime < this.invalidTime) break block48;
                            if (this.checkingRobots <= 0) break;
                            try {
                                this.wait();
                            }
                            catch (InterruptedException e) {
                                throw new ManifoldCFException("Interrupted: " + e.getMessage(), (Throwable)e, 2);
                            }
                        }
                        this.isValid = false;
                        this.records = null;
                        this.readingRobots = true;
                        break block49;
                    }
                    ++this.checkingRobots;
                }
            }
            try {
                Record r;
                if (this.readingRobots) {
                    this.makeValid(currentTime, userAgent, from, minimumMillisecondsPerBytePerServer, maxOpenConnectionsPerServer, minimumMillisecondsPerFetchPerServer, proxyHost, proxyPort, proxyAuthDomain, proxyAuthUsername, proxyAuthPassword, this.hostName, activities, connectionLimit);
                }
                if (this.records == null) {
                    boolean bl = true;
                    return bl;
                }
                boolean wasDisallowed = false;
                boolean wasAllowed = false;
                boolean sawAgent = false;
                String userAgentUpper = userAgent.toUpperCase();
                int i = 0;
                while (i < this.records.size()) {
                    if (!(r = (Record)this.records.get(i++)).isAgentMatch(userAgentUpper, false)) continue;
                    if (r.isDisallowed(pathString)) {
                        wasDisallowed = true;
                    }
                    if (r.isAllowed(pathString)) {
                        wasAllowed = true;
                    }
                    sawAgent = true;
                    break;
                }
                if (!sawAgent) {
                    i = 0;
                    while (i < this.records.size()) {
                        if (!(r = (Record)this.records.get(i++)).isAgentMatch("*", true)) continue;
                        if (r.isDisallowed(pathString)) {
                            wasDisallowed = true;
                        }
                        if (r.isAllowed(pathString)) {
                            wasAllowed = true;
                        }
                        sawAgent = true;
                        break;
                    }
                }
                if (!sawAgent) {
                    boolean bl = true;
                    return bl;
                }
                if (wasAllowed) {
                    boolean bl = true;
                    return bl;
                }
                if (wasDisallowed) {
                    boolean bl = false;
                    return bl;
                }
                boolean bl = true;
                return bl;
            }
            finally {
                Host e = this;
                synchronized (e) {
                    if (this.readingRobots) {
                        this.readingRobots = false;
                    } else {
                        --this.checkingRobots;
                    }
                    this.notifyAll();
                }
            }
        }

        public synchronized boolean canBeFlushed(long currentTime) {
            if (this.readingRobots || this.checkingRobots > 0) {
                return false;
            }
            if (!this.isValid) {
                return true;
            }
            if (currentTime >= this.invalidTime) {
                this.isValid = false;
                this.records = null;
                return true;
            }
            return false;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Unable to fully structure code
         */
        protected void makeValid(long currentTime, String userAgent, String from, double minimumMillisecondsPerBytePerServer, int maxOpenConnectionsPerServer, long minimumMillisecondsPerFetchPerServer, String proxyHost, int proxyPort, String proxyAuthDomain, String proxyAuthUsername, String proxyAuthPassword, String hostName, IVersionActivity activities, int connectionLimit) throws ServiceInterruption, ManifoldCFException {
            this.invalidTime = currentTime + 86400000L;
            connection = Robots.this.fetcher.createConnection(hostName, minimumMillisecondsPerBytePerServer, maxOpenConnectionsPerServer, minimumMillisecondsPerFetchPerServer, connectionLimit, 60000);
            try {
                connection.beginFetch("Robot");
                try {
                    responseCode = connection.executeFetch(this.protocol, this.port, "/robots.txt", userAgent, from, proxyHost, proxyPort, proxyAuthDomain, proxyAuthUsername, proxyAuthPassword, null, null);
                    switch (responseCode) {
                        case 0: {
                            is = connection.getResponseBodyStream();
                            try {
                                r = new InputStreamReader(is, "utf-8");
                                try {
                                    br = new BufferedReader(r);
                                    try {
                                        this.parseRobotsTxt(br, hostName, activities);
                                        break;
                                    }
                                    finally {
                                        br.close();
                                    }
                                }
                                finally {
                                    r.close();
                                }
                            }
                            finally {
                                is.close();
                            }
                        }
                        case 1: {
                            r = new Record();
                            r.addAgent("*");
                            r.addDisallow("/");
                            this.records = new ArrayList<E>();
                            this.records.add(r);
                            ** break;
lbl34:
                            // 1 sources

                            break;
                        }
                        ** default:
lbl36:
                        // 1 sources

                        break;
                    }
                }
                finally {
                    connection.doneFetch(activities);
                }
            }
            catch (InterruptedIOException e) {
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), (Throwable)e, 2);
            }
            catch (IOException e) {
                throw new ServiceInterruption("Couldn't fetch robots.txt from " + this.protocol + "://" + hostName + ":" + Integer.toString(this.port), currentTime + 300000L);
            }
            finally {
                connection.close();
            }
            this.isValid = true;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        protected void parseRobotsTxt(BufferedReader r, String hostName, IVersionActivity activities) throws IOException, ManifoldCFException {
            boolean parseCompleted = false;
            boolean robotsWasHtml = false;
            boolean foundErrors = false;
            String description = null;
            long startParseTime = System.currentTimeMillis();
            try {
                String x;
                this.records = new ArrayList();
                Record record = null;
                boolean seenAction = false;
                while ((x = r.readLine()) != null) {
                    String allowPath;
                    String disallowPath;
                    String agentName;
                    String lowercaseLine;
                    int numSignPos = x.indexOf("#");
                    if (numSignPos != -1) {
                        x = x.substring(0, numSignPos);
                    }
                    if ((lowercaseLine = x.toLowerCase().trim()).startsWith("user-agent:")) {
                        if (seenAction) {
                            this.records.add(record);
                            record = null;
                            seenAction = false;
                        }
                        if (record == null) {
                            record = new Record();
                        }
                        agentName = x.substring("User-agent:".length()).trim();
                        record.addAgent(agentName);
                        continue;
                    }
                    if (lowercaseLine.startsWith("user-agent")) {
                        if (seenAction) {
                            this.records.add(record);
                            record = null;
                            seenAction = false;
                        }
                        if (record == null) {
                            record = new Record();
                        }
                        agentName = x.substring("User-agent".length()).trim();
                        record.addAgent(agentName);
                        continue;
                    }
                    if (lowercaseLine.startsWith("disallow:")) {
                        if (record == null) {
                            description = "Disallow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        disallowPath = x.substring("Disallow:".length()).trim();
                        if (disallowPath.length() > 0) {
                            record.addDisallow(disallowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("disallow")) {
                        if (record == null) {
                            description = "Disallow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        disallowPath = x.substring("Disallow".length()).trim();
                        if (disallowPath.length() > 0) {
                            record.addDisallow(disallowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("allow:")) {
                        if (record == null) {
                            description = "Allow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        allowPath = x.substring("Allow:".length()).trim();
                        if (allowPath.length() > 0) {
                            record.addAllow(allowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("allow")) {
                        if (record == null) {
                            description = "Allow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        allowPath = x.substring("Allow".length()).trim();
                        if (allowPath.length() > 0) {
                            record.addAllow(allowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("crawl-delay:") || lowercaseLine.startsWith("crawl-delay") || x.trim().length() <= 0) continue;
                    String problemLine = Robots.makeReadable(x);
                    description = "Unknown robots.txt line: '" + problemLine + "'";
                    Logging.connectors.warn((Object)("Web: Unknown robots.txt line from '" + hostName + "': '" + problemLine + "'"));
                    if (x.indexOf("<html") != -1 || x.indexOf("<HTML") != -1) {
                        robotsWasHtml = true;
                        parseCompleted = true;
                        break;
                    }
                    foundErrors = true;
                }
                if (record != null) {
                    this.records.add(record);
                }
                parseCompleted = true;
            }
            finally {
                String status;
                if (parseCompleted) {
                    if (robotsWasHtml) {
                        status = "HTML";
                        description = "Robots file contained HTML, skipped";
                    } else if (foundErrors) {
                        status = "ERRORS";
                    } else {
                        status = "SUCCESS";
                        description = null;
                    }
                } else {
                    status = "INCOMPLETE";
                    description = "Parsing was interrupted";
                }
                activities.recordActivity(new Long(startParseTime), "robots parse", null, hostName, status, description, null);
            }
        }
    }
}

