/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.protocol.http.api;

import crawlercommons.robots.BaseRobotRules;
import java.io.IOException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.protocol.http.api.HttpRobotRulesParser;
import org.apache.nutch.util.DeflateUtils;
import org.apache.nutch.util.GZIPUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class HttpBase
implements Protocol {
    public static final Text RESPONSE_TIME = new Text("_rs_");
    public static final int BUFFER_SIZE = 8192;
    private static final byte[] EMPTY_CONTENT = new byte[0];
    private HttpRobotRulesParser robots = null;
    protected String proxyHost = null;
    protected int proxyPort = 8080;
    protected boolean useProxy = false;
    protected int timeout = 10000;
    protected int maxContent = 65536;
    protected String userAgent = HttpBase.getAgentString("NutchCVS", null, "Nutch", "http://nutch.apache.org/bot.html", "agent@nutch.apache.org");
    protected String acceptLanguage = "en-us,en-gb,en;q=0.7,*;q=0.3";
    protected String accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    private static final Logger LOGGER = LoggerFactory.getLogger(HttpBase.class);
    private Logger logger = LOGGER;
    private Configuration conf = null;
    protected boolean useHttp11 = false;
    protected boolean responseTime = true;
    protected long maxCrawlDelay = -1L;

    public HttpBase() {
        this(null);
    }

    public HttpBase(Logger logger) {
        if (logger != null) {
            this.logger = logger;
        }
        this.robots = new HttpRobotRulesParser();
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        this.proxyHost = conf.get("http.proxy.host");
        this.proxyPort = conf.getInt("http.proxy.port", 8080);
        this.useProxy = this.proxyHost != null && this.proxyHost.length() > 0;
        this.timeout = conf.getInt("http.timeout", 10000);
        this.maxContent = conf.getInt("http.content.limit", 65536);
        this.userAgent = HttpBase.getAgentString(conf.get("http.agent.name"), conf.get("http.agent.version"), conf.get("http.agent.description"), conf.get("http.agent.url"), conf.get("http.agent.email"));
        this.acceptLanguage = conf.get("http.accept.language", this.acceptLanguage);
        this.accept = conf.get("http.accept", this.accept);
        this.useHttp11 = conf.getBoolean("http.useHttp11", false);
        this.responseTime = conf.getBoolean("http.store.responsetime", true);
        this.robots.setConf(conf);
        this.logConf();
    }

    public Configuration getConf() {
        return this.conf;
    }

    public ProtocolOutput getProtocolOutput(Text url, CrawlDatum datum) {
        String urlString = url.toString();
        try {
            URL u = new URL(urlString);
            long startTime = System.currentTimeMillis();
            Response response = this.getResponse(u, datum, false);
            if (this.responseTime) {
                int elapsedTime = (int)(System.currentTimeMillis() - startTime);
                datum.getMetaData().put((Writable)RESPONSE_TIME, (Writable)new IntWritable(elapsedTime));
            }
            int code = response.getCode();
            byte[] content = response.getContent();
            Content c = new Content(u.toString(), u.toString(), content == null ? EMPTY_CONTENT : content, response.getHeader("Content-Type"), response.getHeaders(), this.conf);
            if (code == 200) {
                return new ProtocolOutput(c);
            }
            if (code == 410) {
                return new ProtocolOutput(c, new ProtocolStatus(11, (Object)("Http: " + code + " url=" + url)));
            }
            if (code >= 300 && code < 400) {
                int protocolStatusCode;
                String location = response.getHeader("Location");
                if (location == null) {
                    location = response.getHeader("location");
                }
                if (location == null) {
                    location = "";
                }
                u = new URL(u, location);
                switch (code) {
                    case 300: {
                        protocolStatusCode = 12;
                        break;
                    }
                    case 301: 
                    case 305: {
                        protocolStatusCode = 12;
                        break;
                    }
                    case 302: 
                    case 303: 
                    case 307: {
                        protocolStatusCode = 13;
                        break;
                    }
                    case 304: {
                        protocolStatusCode = 21;
                        break;
                    }
                    default: {
                        protocolStatusCode = 12;
                    }
                }
                return new ProtocolOutput(c, new ProtocolStatus(protocolStatusCode, (Object)u));
            }
            if (code == 400) {
                if (this.logger.isTraceEnabled()) {
                    this.logger.trace("400 Bad request: " + u);
                }
                return new ProtocolOutput(c, new ProtocolStatus(11, (Object)u));
            }
            if (code == 401) {
                if (this.logger.isTraceEnabled()) {
                    this.logger.trace("401 Authentication Required");
                }
                return new ProtocolOutput(c, new ProtocolStatus(17, (Object)("Authentication required: " + urlString)));
            }
            if (code == 404) {
                return new ProtocolOutput(c, new ProtocolStatus(14, (Object)u));
            }
            if (code == 410) {
                return new ProtocolOutput(c, new ProtocolStatus(11, (Object)u));
            }
            return new ProtocolOutput(c, new ProtocolStatus(16, (Object)("Http code=" + code + ", url=" + u)));
        }
        catch (Throwable e) {
            this.logger.error("Failed to get protocol output", e);
            return new ProtocolOutput(null, new ProtocolStatus(e));
        }
    }

    public String getProxyHost() {
        return this.proxyHost;
    }

    public int getProxyPort() {
        return this.proxyPort;
    }

    public boolean useProxy() {
        return this.useProxy;
    }

    public int getTimeout() {
        return this.timeout;
    }

    public int getMaxContent() {
        return this.maxContent;
    }

    public String getUserAgent() {
        return this.userAgent;
    }

    public String getAcceptLanguage() {
        return this.acceptLanguage;
    }

    public String getAccept() {
        return this.accept;
    }

    public boolean getUseHttp11() {
        return this.useHttp11;
    }

    private static String getAgentString(String agentName, String agentVersion, String agentDesc, String agentURL, String agentEmail) {
        if ((agentName == null || agentName.trim().length() == 0) && LOGGER.isErrorEnabled()) {
            LOGGER.error("No User-Agent string set (http.agent.name)!");
        }
        StringBuffer buf = new StringBuffer();
        buf.append(agentName);
        if (agentVersion != null) {
            buf.append("/");
            buf.append(agentVersion);
        }
        if (agentDesc != null && agentDesc.length() != 0 || agentEmail != null && agentEmail.length() != 0 || agentURL != null && agentURL.length() != 0) {
            buf.append(" (");
            if (agentDesc != null && agentDesc.length() != 0) {
                buf.append(agentDesc);
                if (agentURL != null || agentEmail != null) {
                    buf.append("; ");
                }
            }
            if (agentURL != null && agentURL.length() != 0) {
                buf.append(agentURL);
                if (agentEmail != null) {
                    buf.append("; ");
                }
            }
            if (agentEmail != null && agentEmail.length() != 0) {
                buf.append(agentEmail);
            }
            buf.append(")");
        }
        return buf.toString();
    }

    protected void logConf() {
        if (this.logger.isInfoEnabled()) {
            this.logger.info("http.proxy.host = " + this.proxyHost);
            this.logger.info("http.proxy.port = " + this.proxyPort);
            this.logger.info("http.timeout = " + this.timeout);
            this.logger.info("http.content.limit = " + this.maxContent);
            this.logger.info("http.agent = " + this.userAgent);
            this.logger.info("http.accept.language = " + this.acceptLanguage);
            this.logger.info("http.accept = " + this.accept);
        }
    }

    public byte[] processGzipEncoded(byte[] compressed, URL url) throws IOException {
        byte[] content;
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("uncompressing....");
        }
        if ((content = this.getMaxContent() >= 0 ? GZIPUtils.unzipBestEffort((byte[])compressed, (int)this.getMaxContent()) : GZIPUtils.unzipBestEffort((byte[])compressed)) == null) {
            throw new IOException("unzipBestEffort returned null");
        }
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("fetched " + compressed.length + " bytes of compressed content (expanded to " + content.length + " bytes) from " + url);
        }
        return content;
    }

    public byte[] processDeflateEncoded(byte[] compressed, URL url) throws IOException {
        byte[] content;
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("inflating....");
        }
        if ((content = DeflateUtils.inflateBestEffort((byte[])compressed, (int)this.getMaxContent())) == null) {
            throw new IOException("inflateBestEffort returned null");
        }
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("fetched " + compressed.length + " bytes of compressed content (expanded to " + content.length + " bytes) from " + url);
        }
        return content;
    }

    protected static void main(HttpBase http, String[] args) throws Exception {
        boolean verbose = false;
        String url = null;
        String usage = "Usage: Http [-verbose] [-timeout N] url";
        if (args.length == 0) {
            System.err.println(usage);
            System.exit(-1);
        }
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-timeout")) {
                http.timeout = Integer.parseInt(args[++i]) * 1000;
                continue;
            }
            if (args[i].equals("-verbose")) {
                verbose = true;
                continue;
            }
            if (i != args.length - 1) {
                System.err.println(usage);
                System.exit(-1);
                continue;
            }
            url = args[i];
        }
        ProtocolOutput out = http.getProtocolOutput(new Text(url), new CrawlDatum());
        Content content = out.getContent();
        System.out.println("Status: " + out.getStatus());
        if (content != null) {
            System.out.println("Content Type: " + content.getContentType());
            System.out.println("Content Length: " + content.getMetadata().get("Content-Length"));
            System.out.println("Content:");
            String text = new String(content.getContent());
            System.out.println(text);
        }
    }

    protected abstract Response getResponse(URL var1, CrawlDatum var2, boolean var3) throws ProtocolException, IOException;

    public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) {
        return this.robots.getRobotRulesSet(this, url);
    }
}

