/*
 * Decompiled with CFR 0.152.
 */
package org.apache.manifoldcf.crawler.connectors.webcrawler;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import org.apache.manifoldcf.core.cachemanager.BaseDescription;
import org.apache.manifoldcf.core.cachemanager.ExecutorBase;
import org.apache.manifoldcf.core.database.BaseTable;
import org.apache.manifoldcf.core.interfaces.BinaryInput;
import org.apache.manifoldcf.core.interfaces.CacheManagerFactory;
import org.apache.manifoldcf.core.interfaces.ColumnDescription;
import org.apache.manifoldcf.core.interfaces.ICacheClass;
import org.apache.manifoldcf.core.interfaces.ICacheDescription;
import org.apache.manifoldcf.core.interfaces.ICacheExecutor;
import org.apache.manifoldcf.core.interfaces.ICacheHandle;
import org.apache.manifoldcf.core.interfaces.ICacheManager;
import org.apache.manifoldcf.core.interfaces.IDBInterface;
import org.apache.manifoldcf.core.interfaces.IResultRow;
import org.apache.manifoldcf.core.interfaces.IResultSet;
import org.apache.manifoldcf.core.interfaces.IThreadContext;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
import org.apache.manifoldcf.core.interfaces.StringSet;
import org.apache.manifoldcf.core.interfaces.StringSetBuffer;
import org.apache.manifoldcf.core.interfaces.TempFileInput;
import org.apache.manifoldcf.crawler.interfaces.IVersionActivity;
import org.apache.manifoldcf.crawler.system.Logging;

public class RobotsManager
extends BaseTable {
    public static final String _rcsid = "@(#)$Id: RobotsManager.java 988245 2010-08-23 18:39:35Z kwright $";
    protected static RobotsCacheClass robotsCacheClass = new RobotsCacheClass();
    protected static final String hostField = "hostname";
    protected static final String robotsField = "robotsdata";
    protected static final String expirationField = "expirationtime";
    ICacheManager cacheManager;

    public RobotsManager(IThreadContext tc, IDBInterface database) throws ManifoldCFException {
        super(database, robotsField);
        this.cacheManager = CacheManagerFactory.make((IThreadContext)tc);
    }

    public void install() throws ManifoldCFException {
        block0: {
            Map existing = this.getTableSchema(null, null);
            if (existing != null) break block0;
            HashMap<String, ColumnDescription> map = new HashMap<String, ColumnDescription>();
            map.put(hostField, new ColumnDescription("VARCHAR(255)", true, false, null, null, false));
            map.put(expirationField, new ColumnDescription("BIGINT", false, false, null, null, false));
            map.put(robotsField, new ColumnDescription("BLOB", false, true, null, null, false));
            this.performCreate(map, null);
        }
    }

    public void deinstall() throws ManifoldCFException {
        this.performDrop(null);
    }

    public Boolean checkFetchAllowed(String userAgent, String hostName, long currentTime, String pathString, IVersionActivity activities) throws ManifoldCFException {
        HostDescription[] objectDescriptions = new HostDescription[1];
        StringSetBuffer ssb = new StringSetBuffer();
        ssb.add(RobotsManager.getRobotsKey(hostName));
        objectDescriptions[0] = new HostDescription(hostName, new StringSet(ssb));
        HostExecutor exec = new HostExecutor(this, activities, objectDescriptions[0]);
        this.cacheManager.findObjectsAndExecute((ICacheDescription[])objectDescriptions, null, (ICacheExecutor)exec, this.getTransactionID());
        RobotsData rd = exec.getResults();
        if (rd == null || rd.getExpirationTime() <= currentTime) {
            return null;
        }
        return new Boolean(rd.isFetchAllowed(userAgent, pathString));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void writeRobotsData(String hostName, long expirationTime, InputStream data) throws ManifoldCFException, IOException {
        TempFileInput tfi = null;
        try {
            if (data != null) {
                try {
                    tfi = new TempFileInput(data);
                }
                catch (ManifoldCFException e) {
                    if (e.getErrorCode() == 2) {
                        throw e;
                    }
                    throw new IOException("Fetch failed: " + e.getMessage());
                }
            }
            StringSetBuffer ssb = new StringSetBuffer();
            ssb.add(RobotsManager.getRobotsKey(hostName));
            StringSet cacheKeys = new StringSet(ssb);
            ICacheHandle ch = this.cacheManager.enterCache(null, cacheKeys, this.getTransactionID());
            try {
                this.beginTransaction();
                try {
                    ArrayList<String> params = new ArrayList<String>();
                    params.add(hostName);
                    IResultSet set = this.performQuery("SELECT * FROM " + this.getTableName() + " WHERE " + hostField + "=?", params, null, null);
                    HashMap<String, Object> values = new HashMap<String, Object>();
                    values.put(expirationField, new Long(expirationTime));
                    if (tfi != null) {
                        values.put(robotsField, tfi);
                    }
                    if (set.getRowCount() > 0) {
                        params.clear();
                        params.add(hostName);
                        this.performUpdate(values, " WHERE hostname=?", params, null);
                    } else {
                        values.put(hostField, hostName);
                        this.performInsert(values, null);
                    }
                    this.cacheManager.invalidateKeys(ch);
                }
                catch (ManifoldCFException e) {
                    this.signalRollback();
                    throw e;
                }
                catch (Error e) {
                    this.signalRollback();
                    throw e;
                }
                finally {
                    this.endTransaction();
                }
            }
            finally {
                this.cacheManager.leaveCache(ch);
            }
        }
        finally {
            if (tfi != null) {
                tfi.discard();
            }
        }
    }

    protected static String getRobotsKey(String hostName) {
        return "ROBOTS_" + hostName;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected RobotsData readRobotsData(String hostName, IVersionActivity activities) throws ManifoldCFException {
        RobotsData robotsData;
        ArrayList<String> list = new ArrayList<String>();
        list.add(hostName);
        IResultSet set = this.performQuery("SELECT robotsdata,expirationtime FROM " + this.getTableName() + " WHERE " + hostField + "=?", list, null, null);
        if (set.getRowCount() == 0) {
            return null;
        }
        if (set.getRowCount() > 1) {
            throw new ManifoldCFException("Unexpected number of robotsdata rows matching '" + hostName + "': " + Integer.toString(set.getRowCount()));
        }
        IResultRow row = set.getRow(0);
        long expiration = (Long)row.getValue(expirationField);
        BinaryInput bi = (BinaryInput)row.getValue(robotsField);
        if (bi == null) {
            return new RobotsData(null, expiration, hostName, activities);
        }
        try {
            InputStream is = bi.getStream();
            robotsData = new RobotsData(is, expiration, hostName, activities);
        }
        catch (Throwable throwable) {
            try {
                bi.discard();
                throw throwable;
            }
            catch (InterruptedIOException e) {
                throw new ManifoldCFException("Interrupted: " + e.getMessage(), (Throwable)e, 2);
            }
            catch (IOException e) {
                throw new ManifoldCFException("IO error reading robots data for " + hostName + ": " + e.getMessage(), (Throwable)e);
            }
        }
        bi.discard();
        return robotsData;
    }

    protected static String makeReadable(String inputString) {
        StringBuffer sb = new StringBuffer();
        int i = 0;
        while (i < inputString.length()) {
            char y;
            if ((y = inputString.charAt(i++)) >= ' ') {
                sb.append(y);
                continue;
            }
            sb.append('^');
            sb.append((char)(y + 64));
        }
        return sb.toString();
    }

    protected static boolean doesPathMatch(String path, String spec) {
        return RobotsManager.doesPathMatch(path, 0, spec, 0);
    }

    protected static boolean doesPathMatch(String path, int pathIndex, String spec, int specIndex) {
        while (specIndex != spec.length()) {
            char specChar;
            if ((specChar = spec.charAt(specIndex++)) == '*') {
                while (specIndex < spec.length() && spec.charAt(specIndex) == '*') {
                    ++specIndex;
                }
                while (true) {
                    boolean match;
                    if (match = RobotsManager.doesPathMatch(path, pathIndex, spec, specIndex)) {
                        return true;
                    }
                    if (path.length() == pathIndex) {
                        return false;
                    }
                    ++pathIndex;
                }
            }
            if (specChar == '$' && specIndex == spec.length()) {
                return pathIndex == path.length();
            }
            if (pathIndex == path.length()) {
                return false;
            }
            if (path.charAt(pathIndex) != specChar) {
                return false;
            }
            ++pathIndex;
        }
        return true;
    }

    protected static class Record {
        protected ArrayList userAgents = new ArrayList();
        protected ArrayList disallows = new ArrayList();
        protected ArrayList allows = new ArrayList();

        public void addAgent(String agentName) {
            this.userAgents.add(agentName);
        }

        public void addDisallow(String disallowPath) {
            this.disallows.add(disallowPath);
        }

        public void addAllow(String allowPath) {
            this.allows.add(allowPath);
        }

        public boolean isAgentMatch(String agentNameUpper, boolean exactMatch) {
            int i = 0;
            while (i < this.userAgents.size()) {
                String agent = ((String)this.userAgents.get(i++)).toUpperCase();
                if (exactMatch && agent.trim().equals(agentNameUpper)) {
                    return true;
                }
                if (exactMatch || agentNameUpper.indexOf(agent) == -1) continue;
                return true;
            }
            return false;
        }

        public boolean isDisallowed(String path) {
            int i = 0;
            while (i < this.disallows.size()) {
                String disallow;
                if (!RobotsManager.doesPathMatch(path, disallow = (String)this.disallows.get(i++))) continue;
                return true;
            }
            return false;
        }

        public boolean isAllowed(String path) {
            int i = 0;
            while (i < this.allows.size()) {
                String allow;
                if (!RobotsManager.doesPathMatch(path, allow = (String)this.allows.get(i++))) continue;
                return true;
            }
            return false;
        }
    }

    protected static class HostExecutor
    extends ExecutorBase {
        protected RobotsManager thisManager;
        protected RobotsData returnValue;
        protected HostDescription thisHost;
        protected IVersionActivity activities;

        public HostExecutor(RobotsManager manager, IVersionActivity activities, HostDescription objectDescription) {
            this.thisManager = manager;
            this.activities = activities;
            this.thisHost = objectDescription;
            this.returnValue = null;
        }

        public RobotsData getResults() {
            return this.returnValue;
        }

        public Object[] create(ICacheDescription[] objectDescriptions) throws ManifoldCFException {
            Object[] rval = new RobotsData[objectDescriptions.length];
            for (int i = 0; i < rval.length; ++i) {
                HostDescription desc = (HostDescription)objectDescriptions[i];
                rval[i] = this.thisManager.readRobotsData(desc.getHostName(), this.activities);
            }
            return rval;
        }

        public void exists(ICacheDescription objectDescription, Object cachedObject) throws ManifoldCFException {
            HostDescription objectDesc = (HostDescription)objectDescription;
            RobotsData robotsData = (RobotsData)cachedObject;
            if (objectDesc.equals((Object)this.thisHost)) {
                this.returnValue = robotsData;
            }
        }

        public void execute() throws ManifoldCFException {
        }
    }

    protected static class RobotsCacheClass
    implements ICacheClass {
        protected RobotsCacheClass() {
        }

        public String getClassName() {
            return "ROBOTSCLASS";
        }

        public int getMaxLRUCount() {
            return 2000;
        }
    }

    protected static class HostDescription
    extends BaseDescription {
        protected String hostName;
        protected String criticalSectionName;
        protected StringSet cacheKeys;

        public HostDescription(String hostName, StringSet invKeys) {
            super("robotscache");
            this.hostName = hostName;
            this.criticalSectionName = ((Object)((Object)this)).getClass().getName() + "-" + hostName;
            this.cacheKeys = invKeys;
        }

        public String getHostName() {
            return this.hostName;
        }

        public int hashCode() {
            return this.hostName.hashCode();
        }

        public boolean equals(Object o) {
            if (!(o instanceof HostDescription)) {
                return false;
            }
            HostDescription d = (HostDescription)((Object)o);
            return d.hostName.equals(this.hostName);
        }

        public String getCriticalSectionName() {
            return this.criticalSectionName;
        }

        public StringSet getObjectKeys() {
            return this.cacheKeys;
        }

        public ICacheClass getObjectClass() {
            return robotsCacheClass;
        }
    }

    protected static class RobotsData {
        protected long expiration;
        protected ArrayList records = null;

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public RobotsData(InputStream is, long expiration, String hostName, IVersionActivity activities) throws IOException, ManifoldCFException {
            this.expiration = expiration;
            if (is == null) {
                this.records = null;
                return;
            }
            InputStreamReader r = new InputStreamReader(is, "utf-8");
            try {
                BufferedReader br = new BufferedReader(r);
                try {
                    this.parseRobotsTxt(br, hostName, activities);
                }
                finally {
                    br.close();
                }
            }
            finally {
                ((Reader)r).close();
            }
        }

        public boolean isFetchAllowed(String userAgent, String pathString) {
            Record r;
            if (this.records == null) {
                return true;
            }
            boolean wasDisallowed = false;
            boolean wasAllowed = false;
            boolean sawAgent = false;
            String userAgentUpper = userAgent.toUpperCase();
            int i = 0;
            while (i < this.records.size()) {
                if (!(r = (Record)this.records.get(i++)).isAgentMatch(userAgentUpper, false)) continue;
                if (r.isDisallowed(pathString)) {
                    wasDisallowed = true;
                }
                if (r.isAllowed(pathString)) {
                    wasAllowed = true;
                }
                sawAgent = true;
                break;
            }
            if (!sawAgent) {
                i = 0;
                while (i < this.records.size()) {
                    if (!(r = (Record)this.records.get(i++)).isAgentMatch("*", true)) continue;
                    if (r.isDisallowed(pathString)) {
                        wasDisallowed = true;
                    }
                    if (r.isAllowed(pathString)) {
                        wasAllowed = true;
                    }
                    sawAgent = true;
                    break;
                }
            }
            if (!sawAgent) {
                return true;
            }
            if (wasAllowed) {
                return true;
            }
            return !wasDisallowed;
        }

        public long getExpirationTime() {
            return this.expiration;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        protected void parseRobotsTxt(BufferedReader r, String hostName, IVersionActivity activities) throws IOException, ManifoldCFException {
            boolean parseCompleted = false;
            boolean robotsWasHtml = false;
            boolean foundErrors = false;
            String description = null;
            long startParseTime = System.currentTimeMillis();
            try {
                String x;
                this.records = new ArrayList();
                Record record = null;
                boolean seenAction = false;
                while ((x = r.readLine()) != null) {
                    String allowPath;
                    String disallowPath;
                    String agentName;
                    String lowercaseLine;
                    int numSignPos = x.indexOf("#");
                    if (numSignPos != -1) {
                        x = x.substring(0, numSignPos);
                    }
                    if ((lowercaseLine = x.toLowerCase().trim()).startsWith("user-agent:")) {
                        if (seenAction) {
                            this.records.add(record);
                            record = null;
                            seenAction = false;
                        }
                        if (record == null) {
                            record = new Record();
                        }
                        agentName = x.substring("User-agent:".length()).trim();
                        record.addAgent(agentName);
                        continue;
                    }
                    if (lowercaseLine.startsWith("user-agent")) {
                        if (seenAction) {
                            this.records.add(record);
                            record = null;
                            seenAction = false;
                        }
                        if (record == null) {
                            record = new Record();
                        }
                        agentName = x.substring("User-agent".length()).trim();
                        record.addAgent(agentName);
                        continue;
                    }
                    if (lowercaseLine.startsWith("disallow:")) {
                        if (record == null) {
                            description = "Disallow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        disallowPath = x.substring("Disallow:".length()).trim();
                        if (disallowPath.length() > 0) {
                            record.addDisallow(disallowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("disallow")) {
                        if (record == null) {
                            description = "Disallow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        disallowPath = x.substring("Disallow".length()).trim();
                        if (disallowPath.length() > 0) {
                            record.addDisallow(disallowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("allow:")) {
                        if (record == null) {
                            description = "Allow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        allowPath = x.substring("Allow:".length()).trim();
                        if (allowPath.length() > 0) {
                            record.addAllow(allowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("allow")) {
                        if (record == null) {
                            description = "Allow without User-agent";
                            Logging.connectors.warn((Object)("Web: Bad robots.txt file format from '" + hostName + "': " + description));
                            foundErrors = true;
                            continue;
                        }
                        allowPath = x.substring("Allow".length()).trim();
                        if (allowPath.length() > 0) {
                            record.addAllow(allowPath);
                        }
                        seenAction = true;
                        continue;
                    }
                    if (lowercaseLine.startsWith("crawl-delay:") || lowercaseLine.startsWith("crawl-delay") || x.trim().length() <= 0) continue;
                    String problemLine = RobotsManager.makeReadable(x);
                    description = "Unknown robots.txt line: '" + problemLine + "'";
                    Logging.connectors.warn((Object)("Web: Unknown robots.txt line from '" + hostName + "': '" + problemLine + "'"));
                    if (x.indexOf("<html") != -1 || x.indexOf("<HTML") != -1) {
                        robotsWasHtml = true;
                        parseCompleted = true;
                        break;
                    }
                    foundErrors = true;
                }
                if (record != null) {
                    this.records.add(record);
                }
                parseCompleted = true;
            }
            finally {
                String status;
                if (parseCompleted) {
                    if (robotsWasHtml) {
                        status = "HTML";
                        description = "Robots file contained HTML, skipped";
                    } else if (foundErrors) {
                        status = "ERRORS";
                    } else {
                        status = "SUCCESS";
                        description = null;
                    }
                } else {
                    status = "INCOMPLETE";
                    description = "Parsing was interrupted";
                }
                activities.recordActivity(new Long(startParseTime), "robots parse", null, hostName, status, description, null);
            }
        }
    }
}

