/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.tools;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.crawl.CrawlDbReader;
import org.apache.nutch.crawl.Generator;
import org.apache.nutch.crawl.Injector;
import org.apache.nutch.crawl.LinkDb;
import org.apache.nutch.fetcher.Fetcher;
import org.apache.nutch.parse.ParseSegment;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;

public class Benchmark
extends Configured
implements Tool {
    private static final Log LOG = LogFactory.getLog(Benchmark.class);

    public static void main(String[] args) throws Exception {
        Configuration conf = NutchConfiguration.create();
        int res = ToolRunner.run((Configuration)conf, (Tool)new Benchmark(), (String[])args);
        System.exit(res);
    }

    private static String getDate() {
        return new SimpleDateFormat("yyyyMMddHHmmss").format(new Date(System.currentTimeMillis()));
    }

    private void createSeeds(FileSystem fs, Path seedsDir, int count) throws Exception {
        FSDataOutputStream os = fs.create(new Path(seedsDir, "seeds"));
        for (int i = 0; i < count; ++i) {
            String url = "http://www.test-" + i + ".com/\r\n";
            os.write(url.getBytes());
        }
        os.flush();
        os.close();
    }

    public int run(String[] args) throws Exception {
        String plugins = "protocol-http|parse-tika|scoring-opic|urlfilter-regex|urlnormalizer-pass";
        int seeds = 1;
        int depth = 10;
        int threads = 10;
        boolean delete = true;
        long topN = Long.MAX_VALUE;
        if (args.length == 0) {
            System.err.println("Usage: Benchmark [-seeds NN] [-depth NN] [-threads NN] [-keep] [-maxPerHost NN] [-plugins <regex>]");
            System.err.println("\t-seeds NN\tcreate NN unique hosts in a seed list (default: 1)");
            System.err.println("\t-depth NN\tperform NN crawl cycles (default: 10)");
            System.err.println("\t-threads NN\tuse NN threads per Fetcher task (default: 10)");
            System.err.println("\t-keep\tkeep segment data (default: delete after updatedb)");
            System.err.println("\t-plugins <regex>\toverride 'plugin.includes'.");
            System.err.println("\tNOTE: if not specified, this is reset to: " + plugins);
            System.err.println("\tNOTE: if 'default' is specified then a value set in nutch-default/nutch-site is used.");
            System.err.println("\t-maxPerHost NN\tmax. # of URLs per host in a fetchlist");
            return -1;
        }
        int maxPerHost = Integer.MAX_VALUE;
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-seeds")) {
                seeds = Integer.parseInt(args[++i]);
                continue;
            }
            if (args[i].equals("-threads")) {
                threads = Integer.parseInt(args[++i]);
                continue;
            }
            if (args[i].equals("-depth")) {
                depth = Integer.parseInt(args[++i]);
                continue;
            }
            if (args[i].equals("-keep")) {
                delete = false;
                continue;
            }
            if (args[i].equals("-plugins")) {
                plugins = args[++i];
                continue;
            }
            if (args[i].equalsIgnoreCase("-maxPerHost")) {
                maxPerHost = Integer.parseInt(args[++i]);
                continue;
            }
            LOG.fatal((Object)("Invalid argument: '" + args[i] + "'"));
            return -1;
        }
        BenchmarkResults res = this.benchmark(seeds, depth, threads, maxPerHost, topN, delete, plugins);
        System.out.println(res);
        return 0;
    }

    public BenchmarkResults benchmark(int seeds, int depth, int threads, int maxPerHost, long topN, boolean delete, String plugins) throws Exception {
        int i;
        Configuration conf = this.getConf();
        conf.set("http.proxy.host", "localhost");
        conf.setInt("http.proxy.port", 8181);
        conf.set("http.agent.name", "test");
        conf.set("http.robots.agents", "test,*");
        if (!plugins.equals("default")) {
            conf.set("plugin.includes", plugins);
        }
        conf.setInt("generate.max.count", maxPerHost);
        conf.set("generate.count.mode", "host");
        NutchJob job = new NutchJob(this.getConf());
        FileSystem fs = FileSystem.get((Configuration)job);
        Path dir = new Path(this.getConf().get("hadoop.tmp.dir"), "bench-" + System.currentTimeMillis());
        fs.mkdirs(dir);
        Path rootUrlDir = new Path(dir, "seed");
        fs.mkdirs(rootUrlDir);
        this.createSeeds(fs, rootUrlDir, seeds);
        if (LOG.isInfoEnabled()) {
            LOG.info((Object)("crawl started in: " + dir));
            LOG.info((Object)("rootUrlDir = " + rootUrlDir));
            LOG.info((Object)("threads = " + threads));
            LOG.info((Object)("depth = " + depth));
        }
        BenchmarkResults res = new BenchmarkResults();
        res.delete = delete;
        res.depth = depth;
        res.plugins = plugins;
        res.seeds = seeds;
        res.threads = threads;
        res.topN = topN;
        Path crawlDb = new Path(dir + "/crawldb");
        Path linkDb = new Path(dir + "/linkdb");
        Path segments = new Path(dir + "/segments");
        res.elapsed = System.currentTimeMillis();
        Injector injector = new Injector(this.getConf());
        Generator generator = new Generator(this.getConf());
        Fetcher fetcher = new Fetcher(this.getConf());
        ParseSegment parseSegment = new ParseSegment(this.getConf());
        CrawlDb crawlDbTool = new CrawlDb(this.getConf());
        LinkDb linkDbTool = new LinkDb(this.getConf());
        long start = System.currentTimeMillis();
        injector.inject(crawlDb, rootUrlDir);
        long delta = System.currentTimeMillis() - start;
        res.addTiming("inject", "0", delta);
        for (i = 0; i < depth; ++i) {
            start = System.currentTimeMillis();
            Path[] segs = generator.generate(crawlDb, segments, -1, topN, System.currentTimeMillis());
            delta = System.currentTimeMillis() - start;
            res.addTiming("generate", i + "", delta);
            if (segs == null) {
                LOG.info((Object)("Stopping at depth=" + i + " - no more URLs to fetch."));
                break;
            }
            start = System.currentTimeMillis();
            fetcher.fetch(segs[0], threads);
            delta = System.currentTimeMillis() - start;
            res.addTiming("fetch", i + "", delta);
            if (!Fetcher.isParsing((Configuration)job)) {
                start = System.currentTimeMillis();
                parseSegment.parse(segs[0]);
                delta = System.currentTimeMillis() - start;
                res.addTiming("parse", i + "", delta);
            }
            start = System.currentTimeMillis();
            crawlDbTool.update(crawlDb, segs, true, true);
            delta = System.currentTimeMillis() - start;
            res.addTiming("update", i + "", delta);
            start = System.currentTimeMillis();
            linkDbTool.invert(linkDb, segs, true, true, false);
            delta = System.currentTimeMillis() - start;
            res.addTiming("invert", i + "", delta);
            if (!delete) continue;
            for (Path p : segs) {
                fs.delete(p, true);
            }
        }
        if (i == 0) {
            LOG.warn((Object)"No URLs to fetch - check your seed list and URL filters.");
        }
        if (LOG.isInfoEnabled()) {
            LOG.info((Object)("crawl finished: " + dir));
        }
        res.elapsed = System.currentTimeMillis() - res.elapsed;
        CrawlDbReader dbreader = new CrawlDbReader();
        dbreader.processStatJob(crawlDb.toString(), conf, false);
        return res;
    }

    public static final class BenchmarkResults {
        Map<String, Map<String, Long>> timings = new HashMap<String, Map<String, Long>>();
        List<String> runs = new ArrayList<String>();
        List<String> stages = new ArrayList<String>();
        int seeds;
        int depth;
        int threads;
        boolean delete;
        long topN;
        long elapsed;
        String plugins;

        public void addTiming(String stage, String run, long timing) {
            Map<String, Long> t;
            if (!this.runs.contains(run)) {
                this.runs.add(run);
            }
            if (!this.stages.contains(stage)) {
                this.stages.add(stage);
            }
            if ((t = this.timings.get(stage)) == null) {
                t = new HashMap<String, Long>();
                this.timings.put(stage, t);
            }
            t.put(run, timing);
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append("* Plugins:\t" + this.plugins + "\n");
            sb.append("* Seeds:\t" + this.seeds + "\n");
            sb.append("* Depth:\t" + this.depth + "\n");
            sb.append("* Threads:\t" + this.threads + "\n");
            sb.append("* TopN:\t" + this.topN + "\n");
            sb.append("* Delete:\t" + this.delete + "\n");
            sb.append("* TOTAL ELAPSED:\t" + this.elapsed + "\n");
            for (String stage : this.stages) {
                Map<String, Long> timing = this.timings.get(stage);
                if (timing == null) continue;
                sb.append("- stage: " + stage + "\n");
                for (String r : this.runs) {
                    Long Time = timing.get(r);
                    if (Time == null) continue;
                    sb.append("\trun " + r + "\t" + Time + "\n");
                }
            }
            return sb.toString();
        }

        public List<String> getStages() {
            return this.stages;
        }

        public List<String> getRuns() {
            return this.runs;
        }
    }
}

