/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NodeDumper
extends Configured
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(NodeDumper.class);

    public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff, NameType nameType, AggrType aggrType, boolean asSequenceFile) throws Exception {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("NodeDumper: starting at " + sdf.format(start));
        Path nodeDb = new Path(webGraphDb, "nodes");
        Configuration conf = this.getConf();
        NutchJob dumper = new NutchJob(conf);
        dumper.setJobName("NodeDumper: " + webGraphDb);
        FileInputFormat.addInputPath((JobConf)dumper, (Path)nodeDb);
        dumper.setInputFormat(SequenceFileInputFormat.class);
        if (nameType == null) {
            dumper.setMapperClass(Sorter.class);
            dumper.setReducerClass(Sorter.class);
            dumper.setMapOutputKeyClass(FloatWritable.class);
            dumper.setMapOutputValueClass(Text.class);
        } else {
            dumper.setMapperClass(Dumper.class);
            dumper.setReducerClass(Dumper.class);
            dumper.setMapOutputKeyClass(Text.class);
            dumper.setMapOutputValueClass(FloatWritable.class);
        }
        dumper.setOutputKeyClass(Text.class);
        dumper.setOutputValueClass(FloatWritable.class);
        FileOutputFormat.setOutputPath((JobConf)dumper, (Path)output);
        if (asSequenceFile) {
            dumper.setOutputFormat(SequenceFileOutputFormat.class);
        } else {
            dumper.setOutputFormat(TextOutputFormat.class);
        }
        dumper.setNumReduceTasks(1);
        dumper.setBoolean("inlinks", type == DumpType.INLINKS);
        dumper.setBoolean("outlinks", type == DumpType.OUTLINKS);
        dumper.setBoolean("scores", type == DumpType.SCORES);
        dumper.setBoolean("host", nameType == NameType.HOST);
        dumper.setBoolean("domain", nameType == NameType.DOMAIN);
        dumper.setBoolean("sum", aggrType == AggrType.SUM);
        dumper.setBoolean("max", aggrType == AggrType.MAX);
        dumper.setLong("topn", topN);
        if (asEff) {
            dumper.set("mapred.textoutputformat.separator", "=");
        }
        try {
            LOG.info("NodeDumper: running");
            JobClient.runJob((JobConf)dumper);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        long end = System.currentTimeMillis();
        LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new NodeDumper(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the web graph database to use");
        Option webGraphDbOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webGraphDbOpts);
        OptionBuilder.withArgName((String)"inlinks");
        OptionBuilder.withDescription((String)"show highest inlinks");
        Option inlinkOpts = OptionBuilder.create((String)"inlinks");
        options.addOption(inlinkOpts);
        OptionBuilder.withArgName((String)"outlinks");
        OptionBuilder.withDescription((String)"show highest outlinks");
        Option outlinkOpts = OptionBuilder.create((String)"outlinks");
        options.addOption(outlinkOpts);
        OptionBuilder.withArgName((String)"scores");
        OptionBuilder.withDescription((String)"show highest scores");
        Option scoreOpts = OptionBuilder.create((String)"scores");
        options.addOption(scoreOpts);
        OptionBuilder.withArgName((String)"topn");
        OptionBuilder.hasOptionalArg();
        OptionBuilder.withDescription((String)"show topN scores");
        Option topNOpts = OptionBuilder.create((String)"topn");
        options.addOption(topNOpts);
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the output directory to use");
        Option outputOpts = OptionBuilder.create((String)"output");
        options.addOption(outputOpts);
        OptionBuilder.withArgName((String)"asEff");
        OptionBuilder.withDescription((String)"Solr ExternalFileField compatible output format");
        Option effOpts = OptionBuilder.create((String)"asEff");
        options.addOption(effOpts);
        OptionBuilder.hasArgs((int)2);
        OptionBuilder.withDescription((String)"group <host|domain> <sum|max>");
        Option groupOpts = OptionBuilder.create((String)"group");
        options.addOption(groupOpts);
        OptionBuilder.withArgName((String)"asSequenceFile");
        OptionBuilder.withDescription((String)"whether to output as a sequencefile");
        Option sequenceFileOpts = OptionBuilder.create((String)"asSequenceFile");
        options.addOption(sequenceFileOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("NodeDumper", options);
                return -1;
            }
            String webGraphDb = line.getOptionValue("webgraphdb");
            boolean inlinks = line.hasOption("inlinks");
            boolean outlinks = line.hasOption("outlinks");
            long topN = line.hasOption("topn") ? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE;
            String output = line.getOptionValue("output");
            DumpType type = inlinks ? DumpType.INLINKS : (outlinks ? DumpType.OUTLINKS : DumpType.SCORES);
            NameType nameType = null;
            AggrType aggrType = null;
            String[] group = line.getOptionValues("group");
            if (group != null && group.length == 2) {
                NameType nameType2 = group[0].equals("host") ? NameType.HOST : (nameType = group[0].equals("domain") ? NameType.DOMAIN : null);
                aggrType = group[1].equals("sum") ? AggrType.SUM : (group[1].equals("sum") ? AggrType.MAX : null);
            }
            boolean asEff = line.hasOption("asEff");
            boolean asSequenceFile = line.hasOption("asSequenceFile");
            this.dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff, nameType, aggrType, asSequenceFile);
            return 0;
        }
        catch (Exception e) {
            LOG.error("NodeDumper: " + StringUtils.stringifyException((Throwable)e));
            return -2;
        }
    }

    public static class Dumper
    extends Configured
    implements Mapper<Text, Node, Text, FloatWritable>,
    Reducer<Text, FloatWritable, Text, FloatWritable> {
        private JobConf conf;
        private boolean inlinks = false;
        private boolean outlinks = false;
        private boolean scores = false;
        private long topn = Long.MAX_VALUE;
        private boolean host = false;
        private boolean domain = false;
        private boolean sum = false;
        private boolean max = false;

        public void configure(JobConf conf) {
            this.conf = conf;
            this.inlinks = conf.getBoolean("inlinks", false);
            this.outlinks = conf.getBoolean("outlinks", false);
            this.scores = conf.getBoolean("scores", true);
            this.topn = conf.getLong("topn", Long.MAX_VALUE);
            this.host = conf.getBoolean("host", false);
            this.domain = conf.getBoolean("domain", false);
            this.sum = conf.getBoolean("sum", false);
            this.max = conf.getBoolean("max", false);
        }

        public void close() {
        }

        public void map(Text key, Node node, OutputCollector<Text, FloatWritable> output, Reporter reporter) throws IOException {
            float number = 0.0f;
            number = this.inlinks ? (float)node.getNumInlinks() : (this.outlinks ? (float)node.getNumOutlinks() : node.getInlinkScore());
            if (this.host) {
                key.set(URLUtil.getHost(key.toString()));
            } else {
                key.set(URLUtil.getDomainName(key.toString()));
            }
            output.collect((Object)key, (Object)new FloatWritable(number));
        }

        public void reduce(Text key, Iterator<FloatWritable> values, OutputCollector<Text, FloatWritable> output, Reporter reporter) throws IOException {
            float sumOrMax = 0.0f;
            float val = 0.0f;
            for (long numCollected = 0L; values.hasNext() && numCollected < this.topn; ++numCollected) {
                val = values.next().get();
                if (this.sum) {
                    sumOrMax += val;
                    continue;
                }
                if (!(sumOrMax < val)) continue;
                sumOrMax = val;
            }
            output.collect((Object)key, (Object)new FloatWritable(sumOrMax));
        }
    }

    public static class Sorter
    extends Configured
    implements Mapper<Text, Node, FloatWritable, Text>,
    Reducer<FloatWritable, Text, Text, FloatWritable> {
        private JobConf conf;
        private boolean inlinks = false;
        private boolean outlinks = false;
        private boolean scores = false;
        private long topn = Long.MAX_VALUE;

        public void configure(JobConf conf) {
            this.conf = conf;
            this.inlinks = conf.getBoolean("inlinks", false);
            this.outlinks = conf.getBoolean("outlinks", false);
            this.scores = conf.getBoolean("scores", true);
            this.topn = conf.getLong("topn", Long.MAX_VALUE);
        }

        public void close() {
        }

        public void map(Text key, Node node, OutputCollector<FloatWritable, Text> output, Reporter reporter) throws IOException {
            float number = 0.0f;
            number = this.inlinks ? (float)node.getNumInlinks() : (this.outlinks ? (float)node.getNumOutlinks() : node.getInlinkScore());
            output.collect((Object)new FloatWritable(-number), (Object)key);
        }

        public void reduce(FloatWritable key, Iterator<Text> values, OutputCollector<Text, FloatWritable> output, Reporter reporter) throws IOException {
            float val = key.get();
            FloatWritable number = new FloatWritable(val == 0.0f ? 0.0f : -val);
            for (long numCollected = 0L; values.hasNext() && numCollected < this.topn; ++numCollected) {
                Text url = (Text)WritableUtils.clone((Writable)((Writable)values.next()), (Configuration)this.conf);
                output.collect((Object)url, (Object)number);
            }
        }
    }

    private static enum NameType {
        HOST,
        DOMAIN;

    }

    private static enum AggrType {
        SUM,
        MAX;

    }

    private static enum DumpType {
        INLINKS,
        OUTLINKS,
        SCORES;

    }
}

