/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.fetcher;

import java.io.IOException;
import java.net.MalformedURLException;
import java.text.SimpleDateFormat;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapRunnable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.fetcher.FetcherOutputFormat;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OldFetcher
extends Configured
implements Tool,
MapRunnable<WritableComparable<?>, Writable, Text, NutchWritable> {
    public static final Logger LOG = LoggerFactory.getLogger(OldFetcher.class);
    public static final int PERM_REFRESH_TIME = 5;
    public static final String CONTENT_REDIR = "content";
    public static final String PROTOCOL_REDIR = "protocol";
    private RecordReader<WritableComparable<?>, Writable> input;
    private OutputCollector<Text, NutchWritable> output;
    private Reporter reporter;
    private String segmentName;
    private int activeThreads;
    private int maxRedirect;
    private long start;
    private long lastRequestStart;
    private long bytes;
    private int pages;
    private int errors;
    private boolean storingContent;
    private boolean parsing;

    private synchronized void updateStatus(int bytesInPage) throws IOException {
        ++this.pages;
        this.bytes += (long)bytesInPage;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void reportStatus() throws IOException {
        String status;
        OldFetcher oldFetcher = this;
        synchronized (oldFetcher) {
            long elapsed = (System.currentTimeMillis() - this.start) / 1000L;
            status = this.pages + " pages, " + this.errors + " errors, " + (double)Math.round((float)this.pages * 10.0f / (float)elapsed) / 10.0 + " pages/s, " + Math.round((float)this.bytes * 8.0f / 1024.0f / (float)elapsed) + " kb/s, ";
        }
        this.reporter.setStatus(status);
    }

    public OldFetcher() {
        this.lastRequestStart = this.start = System.currentTimeMillis();
    }

    public OldFetcher(Configuration conf) {
        this.lastRequestStart = this.start = System.currentTimeMillis();
        this.setConf(conf);
    }

    public void configure(JobConf job) {
        this.setConf((Configuration)job);
        this.segmentName = job.get("nutch.segment.name");
        this.storingContent = OldFetcher.isStoringContent((Configuration)job);
        this.parsing = OldFetcher.isParsing((Configuration)job);
    }

    public void close() {
    }

    public static boolean isParsing(Configuration conf) {
        return conf.getBoolean("fetcher.parse", true);
    }

    public static boolean isStoringContent(Configuration conf) {
        return conf.getBoolean("fetcher.store.content", true);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void run(RecordReader<WritableComparable<?>, Writable> input, OutputCollector<Text, NutchWritable> output, Reporter reporter) throws IOException {
        this.input = input;
        this.output = output;
        this.reporter = reporter;
        this.maxRedirect = this.getConf().getInt("http.redirect.max", 3);
        int threadCount = this.getConf().getInt("fetcher.threads.fetch", 10);
        if (LOG.isInfoEnabled()) {
            LOG.info("OldFetcher: threads: " + threadCount);
        }
        for (int i = 0; i < threadCount; ++i) {
            new FetcherThread(this.getConf()).start();
        }
        long timeout = this.getConf().getInt("mapred.task.timeout", 600000) / 2;
        do {
            try {
                Thread.sleep(1000L);
            }
            catch (InterruptedException e) {
                // empty catch block
            }
            this.reportStatus();
            OldFetcher oldFetcher = this;
            synchronized (oldFetcher) {
                if (System.currentTimeMillis() - this.lastRequestStart > timeout) {
                    if (LOG.isWarnEnabled()) {
                        LOG.warn("Aborting with " + this.activeThreads + " hung threads.");
                    }
                    return;
                }
            }
        } while (this.activeThreads > 0);
    }

    public void fetch(Path segment, int threads) throws IOException {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("OldFetcher: starting at " + sdf.format(start));
            LOG.info("OldFetcher: segment: " + segment);
        }
        NutchJob job = new NutchJob(this.getConf());
        job.setJobName("fetch " + segment);
        job.setInt("fetcher.threads.fetch", threads);
        job.set("nutch.segment.name", segment.getName());
        job.setSpeculativeExecution(false);
        FileInputFormat.addInputPath((JobConf)job, (Path)new Path(segment, "crawl_generate"));
        job.setInputFormat(InputFormat.class);
        job.setMapRunnerClass(OldFetcher.class);
        FileOutputFormat.setOutputPath((JobConf)job, (Path)segment);
        job.setOutputFormat(FetcherOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);
        JobClient.runJob((JobConf)job);
        long end = System.currentTimeMillis();
        LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new OldFetcher(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        String usage = "Usage: OldFetcher <segment> [-threads n] [-noParsing]";
        if (args.length < 1) {
            System.err.println(usage);
            return -1;
        }
        Path segment = new Path(args[0]);
        int threads = this.getConf().getInt("fetcher.threads.fetch", 10);
        boolean parsing = true;
        for (int i = 1; i < args.length; ++i) {
            if (args[i].equals("-threads")) {
                threads = Integer.parseInt(args[++i]);
                continue;
            }
            if (!args[i].equals("-noParsing")) continue;
            parsing = false;
        }
        this.getConf().setInt("fetcher.threads.fetch", threads);
        if (!parsing) {
            this.getConf().setBoolean("fetcher.parse", parsing);
        }
        try {
            this.fetch(segment, threads);
            return 0;
        }
        catch (Exception e) {
            LOG.error("OldFetcher: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }

    private class FetcherThread
    extends Thread {
        private Configuration conf;
        private URLFilters urlFilters;
        private ScoringFilters scfilters;
        private ParseUtil parseUtil;
        private URLNormalizers normalizers;
        private ProtocolFactory protocolFactory;
        private boolean redirecting;
        private int redirectCount;
        private String reprUrl;

        public FetcherThread(Configuration conf) {
            this.setDaemon(true);
            this.setName("FetcherThread");
            this.conf = conf;
            this.urlFilters = new URLFilters(conf);
            this.scfilters = new ScoringFilters(conf);
            this.parseUtil = new ParseUtil(conf);
            this.protocolFactory = new ProtocolFactory(conf);
            this.normalizers = new URLNormalizers(conf, "fetcher");
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        @Override
        public void run() {
            Object key;
            OldFetcher oldFetcher = OldFetcher.this;
            synchronized (oldFetcher) {
                OldFetcher.this.activeThreads++;
            }
            try {
                key = new Text();
                CrawlDatum datum = new CrawlDatum();
                block32: while (true) {
                    try {
                        if (!OldFetcher.this.input.next(key, (Object)datum)) {
                        }
                    }
                    catch (IOException e) {
                        if (LOG.isErrorEnabled()) {
                            LOG.error("fetcher caught:" + e.toString());
                        }
                        break;
                    }
                    OldFetcher e = OldFetcher.this;
                    synchronized (e) {
                        OldFetcher.this.lastRequestStart = System.currentTimeMillis();
                    }
                    Text url = new Text(key);
                    Text reprUrlWritable = (Text)datum.getMetaData().get((Object)Nutch.WRITABLE_REPR_URL_KEY);
                    this.reprUrl = reprUrlWritable == null ? key.toString() : reprUrlWritable.toString();
                    try {
                        if (LOG.isInfoEnabled()) {
                            LOG.info("fetching " + url);
                        }
                        this.redirectCount = 0;
                        do {
                            if (LOG.isDebugEnabled()) {
                                LOG.debug("redirectCount=" + this.redirectCount);
                            }
                            this.redirecting = false;
                            Protocol protocol = this.protocolFactory.getProtocol(url.toString());
                            ProtocolOutput output = protocol.getProtocolOutput(url, datum);
                            ProtocolStatus status = output.getStatus();
                            Content content = output.getContent();
                            ParseStatus pstatus = null;
                            String urlString = url.toString();
                            if (this.reprUrl != null && !this.reprUrl.equals(urlString)) {
                                datum.getMetaData().put((Writable)Nutch.WRITABLE_REPR_URL_KEY, (Writable)new Text(this.reprUrl));
                            }
                            switch (status.getCode()) {
                                case 1: {
                                    pstatus = this.output(url, datum, content, status, 33);
                                    OldFetcher.this.updateStatus(content.getContent().length);
                                    if (pstatus == null || !pstatus.isSuccess() || pstatus.getMinorCode() != 100) break;
                                    String newUrl = pstatus.getMessage();
                                    int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
                                    url = this.handleRedirect(url, datum, urlString, newUrl, refreshTime < 5, OldFetcher.CONTENT_REDIR);
                                    break;
                                }
                                case 12: 
                                case 13: {
                                    boolean temp;
                                    int code;
                                    if (status.getCode() == 12) {
                                        code = 36;
                                        temp = false;
                                    } else {
                                        code = 35;
                                        temp = true;
                                    }
                                    this.output(url, datum, content, status, code);
                                    String newUrl = status.getMessage();
                                    url = this.handleRedirect(url, datum, urlString, newUrl, temp, OldFetcher.PROTOCOL_REDIR);
                                    break;
                                }
                                case 16: {
                                    this.logError(url, status.getMessage());
                                }
                                case 15: 
                                case 22: 
                                case 23: {
                                    this.output(url, datum, null, status, 34);
                                    break;
                                }
                                case 11: 
                                case 14: 
                                case 17: 
                                case 18: {
                                    this.output(url, datum, null, status, 37);
                                    break;
                                }
                                case 21: {
                                    this.output(url, datum, null, status, 38);
                                    break;
                                }
                                default: {
                                    if (LOG.isWarnEnabled()) {
                                        LOG.warn("Unknown ProtocolStatus: " + status.getCode());
                                    }
                                    this.output(url, datum, null, status, 37);
                                }
                            }
                            if (this.redirecting && this.redirectCount >= OldFetcher.this.maxRedirect) {
                                if (LOG.isInfoEnabled()) {
                                    LOG.info(" - redirect count exceeded " + url);
                                }
                                this.output(url, datum, null, status, 37);
                            }
                            if (!this.redirecting) continue block32;
                        } while (this.redirectCount < OldFetcher.this.maxRedirect);
                    }
                    catch (Throwable t) {
                        this.logError(url, t.toString());
                        this.output(url, datum, null, null, 34);
                    }
                }
            }
            catch (Throwable e) {
                if (LOG.isErrorEnabled()) {
                    LOG.error("fetcher caught:" + e.toString());
                }
            }
            finally {
                key = OldFetcher.this;
                synchronized (key) {
                    OldFetcher.this.activeThreads--;
                }
            }
        }

        private Text handleRedirect(Text url, CrawlDatum datum, String urlString, String newUrl, boolean temp, String redirType) throws MalformedURLException, URLFilterException {
            newUrl = this.normalizers.normalize(newUrl, "fetcher");
            if ((newUrl = this.urlFilters.filter(newUrl)) != null && !newUrl.equals(urlString)) {
                this.reprUrl = URLUtil.chooseRepr(this.reprUrl, newUrl, temp);
                url = new Text(newUrl);
                if (OldFetcher.this.maxRedirect > 0) {
                    this.redirecting = true;
                    ++this.redirectCount;
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(" - " + redirType + " redirect to " + url + " (fetching now)");
                    }
                    return url;
                }
                CrawlDatum newDatum = new CrawlDatum();
                if (this.reprUrl != null) {
                    newDatum.getMetaData().put((Writable)Nutch.WRITABLE_REPR_URL_KEY, (Writable)new Text(this.reprUrl));
                }
                this.output(url, newDatum, null, null, 67);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(" - " + redirType + " redirect to " + url + " (fetching later)");
                }
                return null;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(" - " + redirType + " redirect skipped: " + (newUrl != null ? "to same url" : "filtered"));
            }
            return null;
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        private void logError(Text url, String message) {
            if (LOG.isInfoEnabled()) {
                LOG.info("fetch of " + url + " failed with: " + message);
            }
            OldFetcher oldFetcher = OldFetcher.this;
            synchronized (oldFetcher) {
                OldFetcher.this.errors++;
            }
        }

        private ParseStatus output(Text key, CrawlDatum datum, Content content, ProtocolStatus pstatus, int status) {
            Parse p;
            ParseResult parseResult;
            block18: {
                datum.setStatus(status);
                datum.setFetchTime(System.currentTimeMillis());
                if (pstatus != null) {
                    datum.getMetaData().put((Writable)Nutch.WRITABLE_PROTO_STATUS_KEY, (Writable)pstatus);
                }
                parseResult = null;
                if (content != null) {
                    block17: {
                        Metadata metadata = content.getMetadata();
                        metadata.set("nutch.segment.name", OldFetcher.this.segmentName);
                        try {
                            this.scfilters.passScoreBeforeParsing(key, datum, content);
                        }
                        catch (Exception e) {
                            if (!LOG.isWarnEnabled()) break block17;
                            LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
                        }
                    }
                    if (OldFetcher.this.parsing && status == 33) {
                        try {
                            parseResult = this.parseUtil.parse(content);
                        }
                        catch (Exception e) {
                            LOG.warn("Error parsing: " + key + ": " + StringUtils.stringifyException((Throwable)e));
                        }
                        if (parseResult == null) {
                            byte[] signature = SignatureFactory.getSignature(OldFetcher.this.getConf()).calculate(content, new ParseStatus().getEmptyParse(this.conf));
                            datum.setSignature(signature);
                        }
                    }
                    content.getMetadata().add("_fst_", Integer.toString(status));
                }
                try {
                    OldFetcher.this.output.collect((Object)key, (Object)new NutchWritable((Writable)datum));
                    if (content != null && OldFetcher.this.storingContent) {
                        OldFetcher.this.output.collect((Object)key, (Object)new NutchWritable(content));
                    }
                    if (parseResult == null) break block18;
                    for (Map.Entry<Text, Parse> entry : parseResult) {
                        Parse parse;
                        Text url;
                        block19: {
                            url = entry.getKey();
                            parse = entry.getValue();
                            ParseStatus parseStatus = parse.getData().getStatus();
                            if (!parseStatus.isSuccess()) {
                                LOG.warn("Error parsing: " + key + ": " + parseStatus);
                                parse = parseStatus.getEmptyParse(OldFetcher.this.getConf());
                            }
                            byte[] signature = SignatureFactory.getSignature(OldFetcher.this.getConf()).calculate(content, parse);
                            parse.getData().getContentMeta().set("nutch.segment.name", OldFetcher.this.segmentName);
                            parse.getData().getContentMeta().set("nutch.content.digest", StringUtil.toHexString(signature));
                            parse.getData().getContentMeta().set("_ftk_", Long.toString(datum.getFetchTime()));
                            if (url.equals((Object)key)) {
                                datum.setSignature(signature);
                            }
                            try {
                                this.scfilters.passScoreAfterParsing(url, content, parse);
                            }
                            catch (Exception e) {
                                if (!LOG.isWarnEnabled()) break block19;
                                LOG.warn("Couldn't pass score, url " + key + " (" + e + ")");
                            }
                        }
                        OldFetcher.this.output.collect((Object)url, (Object)new NutchWritable(new ParseImpl(new ParseText(parse.getText()), parse.getData(), parse.isCanonical())));
                    }
                }
                catch (IOException e) {
                    if (!LOG.isErrorEnabled()) break block18;
                    LOG.error("fetcher caught:" + e.toString());
                }
            }
            if (parseResult != null && !parseResult.isEmpty() && (p = parseResult.get(content.getUrl())) != null) {
                return p.getData().getStatus();
            }
            return null;
        }
    }

    public static class InputFormat
    extends SequenceFileInputFormat<WritableComparable<?>, Writable> {
        public InputSplit[] getSplits(JobConf job, int nSplits) throws IOException {
            FileStatus[] files = this.listStatus(job);
            InputSplit[] splits = new InputSplit[files.length];
            for (int i = 0; i < files.length; ++i) {
                FileStatus cur = files[i];
                splits[i] = new FileSplit(cur.getPath(), 0L, cur.getLen(), (String[])null);
            }
            return splits;
        }
    }
}

