/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Random;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.scoring.webgraph.LinkDatum;
import org.apache.nutch.scoring.webgraph.Loops;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.FSUtils;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LinkRank
extends Configured
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(LinkRank.class);
    private static final String NUM_NODES = "_num_nodes_";

    private int runCounter(FileSystem fs, Path webGraphDb) throws IOException {
        Path numLinksPath = new Path(webGraphDb, NUM_NODES);
        Path nodeDb = new Path(webGraphDb, "nodes");
        NutchJob counter = new NutchJob(this.getConf());
        counter.setJobName("LinkRank Counter");
        FileInputFormat.addInputPath((JobConf)counter, (Path)nodeDb);
        FileOutputFormat.setOutputPath((JobConf)counter, (Path)numLinksPath);
        counter.setInputFormat(SequenceFileInputFormat.class);
        counter.setMapperClass(Counter.class);
        counter.setCombinerClass(Counter.class);
        counter.setReducerClass(Counter.class);
        counter.setMapOutputKeyClass(Text.class);
        counter.setMapOutputValueClass(LongWritable.class);
        counter.setOutputKeyClass(Text.class);
        counter.setOutputValueClass(LongWritable.class);
        counter.setNumReduceTasks(1);
        counter.setOutputFormat(TextOutputFormat.class);
        counter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting link counter job");
        try {
            JobClient.runJob((JobConf)counter);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        LOG.info("Finished link counter job");
        LOG.info("Reading numlinks temp file");
        FSDataInputStream readLinks = fs.open(new Path(numLinksPath, "part-00000"));
        BufferedReader buffer = new BufferedReader(new InputStreamReader((InputStream)readLinks));
        String numLinksLine = buffer.readLine();
        readLinks.close();
        if (numLinksLine == null || numLinksLine.length() == 0) {
            fs.delete(numLinksPath, true);
            throw new IOException("No links to process, is the webgraph empty?");
        }
        LOG.info("Deleting numlinks temp file");
        fs.delete(numLinksPath, true);
        String numLinks = numLinksLine.split("\\s+")[1];
        return Integer.parseInt(numLinks);
    }

    private void runInitializer(Path nodeDb, Path output) throws IOException {
        NutchJob initializer = new NutchJob(this.getConf());
        initializer.setJobName("LinkAnalysis Initializer");
        FileInputFormat.addInputPath((JobConf)initializer, (Path)nodeDb);
        FileOutputFormat.setOutputPath((JobConf)initializer, (Path)output);
        initializer.setInputFormat(SequenceFileInputFormat.class);
        initializer.setMapperClass(Initializer.class);
        initializer.setMapOutputKeyClass(Text.class);
        initializer.setMapOutputValueClass(Node.class);
        initializer.setOutputKeyClass(Text.class);
        initializer.setOutputValueClass(Node.class);
        initializer.setOutputFormat(MapFileOutputFormat.class);
        initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting initialization job");
        try {
            JobClient.runJob((JobConf)initializer);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        LOG.info("Finished initialization job.");
    }

    private void runInverter(Path nodeDb, Path outlinkDb, Path loopDb, Path output) throws IOException {
        NutchJob inverter = new NutchJob(this.getConf());
        inverter.setJobName("LinkAnalysis Inverter");
        FileInputFormat.addInputPath((JobConf)inverter, (Path)nodeDb);
        FileInputFormat.addInputPath((JobConf)inverter, (Path)outlinkDb);
        if (loopDb != null) {
            FileInputFormat.addInputPath((JobConf)inverter, (Path)loopDb);
        }
        FileOutputFormat.setOutputPath((JobConf)inverter, (Path)output);
        inverter.setInputFormat(SequenceFileInputFormat.class);
        inverter.setMapperClass(Inverter.class);
        inverter.setReducerClass(Inverter.class);
        inverter.setMapOutputKeyClass(Text.class);
        inverter.setMapOutputValueClass(ObjectWritable.class);
        inverter.setOutputKeyClass(Text.class);
        inverter.setOutputValueClass(LinkDatum.class);
        inverter.setOutputFormat(SequenceFileOutputFormat.class);
        inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting inverter job");
        try {
            JobClient.runJob((JobConf)inverter);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        LOG.info("Finished inverter job.");
    }

    private void runAnalysis(Path nodeDb, Path inverted, Path output, int iteration, int numIterations, float rankOne) throws IOException {
        NutchJob analyzer = new NutchJob(this.getConf());
        analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
        analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1) + " of " + numIterations);
        FileInputFormat.addInputPath((JobConf)analyzer, (Path)nodeDb);
        FileInputFormat.addInputPath((JobConf)analyzer, (Path)inverted);
        FileOutputFormat.setOutputPath((JobConf)analyzer, (Path)output);
        analyzer.set("link.analyze.rank.one", String.valueOf(rankOne));
        analyzer.setMapOutputKeyClass(Text.class);
        analyzer.setMapOutputValueClass(ObjectWritable.class);
        analyzer.setInputFormat(SequenceFileInputFormat.class);
        analyzer.setMapperClass(Analyzer.class);
        analyzer.setReducerClass(Analyzer.class);
        analyzer.setOutputKeyClass(Text.class);
        analyzer.setOutputValueClass(Node.class);
        analyzer.setOutputFormat(MapFileOutputFormat.class);
        analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        LOG.info("Starting analysis job");
        try {
            JobClient.runJob((JobConf)analyzer);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        LOG.info("Finished analysis job.");
    }

    public LinkRank() {
    }

    public LinkRank(Configuration conf) {
        super(conf);
    }

    public void close() {
    }

    public void analyze(Path webGraphDb) throws IOException {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("Analysis: starting at " + sdf.format(start));
        Path linkRank = new Path(webGraphDb, "linkrank");
        Configuration conf = this.getConf();
        FileSystem fs = FileSystem.get((Configuration)conf);
        if (!fs.exists(linkRank)) {
            fs.mkdirs(linkRank);
        }
        Path wgOutlinkDb = new Path(webGraphDb, "outlinks/current");
        Path wgNodeDb = new Path(webGraphDb, "nodes");
        Path nodeDb = new Path(linkRank, "nodes");
        Path loopDb = new Path(webGraphDb, "loops");
        if (!fs.exists(loopDb)) {
            loopDb = null;
        }
        int numLinks = this.runCounter(fs, webGraphDb);
        this.runInitializer(wgNodeDb, nodeDb);
        float rankOneScore = 1.0f / (float)numLinks;
        if (LOG.isInfoEnabled()) {
            LOG.info("Analysis: Number of links: " + numLinks);
            LOG.info("Analysis: Rank One: " + rankOneScore);
        }
        int numIterations = conf.getInt("link.analyze.num.iterations", 10);
        for (int i = 0; i < numIterations; ++i) {
            LOG.info("Analysis: Starting iteration " + (i + 1) + " of " + numIterations);
            Path tempRank = new Path(linkRank + "-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
            fs.mkdirs(tempRank);
            Path tempInverted = new Path(tempRank, "inverted");
            Path tempNodeDb = new Path(tempRank, "nodes");
            this.runInverter(nodeDb, wgOutlinkDb, loopDb, tempInverted);
            this.runAnalysis(nodeDb, tempInverted, tempNodeDb, i, numIterations, rankOneScore);
            LOG.info("Analysis: Installing new link scores");
            FSUtils.replace(fs, linkRank, tempRank, true);
            LOG.info("Analysis: finished iteration " + (i + 1) + " of " + numIterations);
        }
        LOG.info("Analysis: Installing web graph nodes");
        FSUtils.replace(fs, wgNodeDb, nodeDb, true);
        fs.delete(linkRank, true);
        long end = System.currentTimeMillis();
        LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new LinkRank(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the web graph db to use");
        Option webgraphOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webgraphOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("LinkRank", options);
                return -1;
            }
            String webGraphDb = line.getOptionValue("webgraphdb");
            this.analyze(new Path(webGraphDb));
            return 0;
        }
        catch (Exception e) {
            LOG.error("LinkAnalysis: " + StringUtils.stringifyException((Throwable)e));
            return -2;
        }
    }

    private static class Analyzer
    implements Mapper<Text, Writable, Text, ObjectWritable>,
    Reducer<Text, ObjectWritable, Text, Node> {
        private JobConf conf;
        private float dampingFactor = 0.85f;
        private float rankOne = 0.0f;
        private int itNum = 0;
        private boolean limitPages = true;
        private boolean limitDomains = true;

        private Analyzer() {
        }

        public void configure(JobConf conf) {
            try {
                this.conf = conf;
                this.dampingFactor = conf.getFloat("link.analyze.damping.factor", 0.85f);
                this.rankOne = conf.getFloat("link.analyze.rank.one", 0.0f);
                this.itNum = conf.getInt("link.analyze.iteration", 0);
                this.limitPages = conf.getBoolean("link.ignore.limit.page", true);
                this.limitDomains = conf.getBoolean("link.ignore.limit.domain", true);
            }
            catch (Exception e) {
                LOG.error(StringUtils.stringifyException((Throwable)e));
                throw new IllegalArgumentException(e);
            }
        }

        public void map(Text key, Writable value, OutputCollector<Text, ObjectWritable> output, Reporter reporter) throws IOException {
            ObjectWritable objWrite = new ObjectWritable();
            objWrite.set((Object)WritableUtils.clone((Writable)value, (Configuration)this.conf));
            output.collect((Object)key, (Object)objWrite);
        }

        public void reduce(Text key, Iterator<ObjectWritable> values, OutputCollector<Text, Node> output, Reporter reporter) throws IOException {
            String url = key.toString();
            HashSet<String> domains = new HashSet<String>();
            HashSet<String> pages = new HashSet<String>();
            Node node = null;
            int numInlinks = 0;
            float totalInlinkScore = this.rankOne;
            while (values.hasNext()) {
                ObjectWritable next = values.next();
                Object value = next.get();
                if (value instanceof Node) {
                    node = (Node)value;
                    continue;
                }
                if (!(value instanceof LinkDatum)) continue;
                LinkDatum linkDatum = (LinkDatum)value;
                float scoreFromInlink = linkDatum.getScore();
                String inlinkUrl = linkDatum.getUrl();
                String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
                String inLinkPage = URLUtil.getPage(inlinkUrl);
                if (this.limitPages && pages.contains(inLinkPage) || this.limitDomains && domains.contains(inLinkDomain)) {
                    LOG.debug(url + ": ignoring " + scoreFromInlink + " from " + inlinkUrl + ", duplicate page or domain");
                    continue;
                }
                ++numInlinks;
                domains.add(inLinkDomain);
                pages.add(inLinkPage);
                LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl + ", total: " + (totalInlinkScore += scoreFromInlink));
            }
            float linkRankScore = 1.0f - this.dampingFactor + this.dampingFactor * totalInlinkScore;
            LOG.debug(url + ": score: " + linkRankScore + " num inlinks: " + numInlinks + " iteration: " + this.itNum);
            Node outNode = (Node)WritableUtils.clone((Writable)node, (Configuration)this.conf);
            outNode.setInlinkScore(linkRankScore);
            output.collect((Object)key, (Object)outNode);
        }

        public void close() throws IOException {
        }
    }

    private static class Inverter
    implements Mapper<Text, Writable, Text, ObjectWritable>,
    Reducer<Text, ObjectWritable, Text, LinkDatum> {
        private JobConf conf;

        private Inverter() {
        }

        public void configure(JobConf conf) {
            this.conf = conf;
        }

        public void map(Text key, Writable value, OutputCollector<Text, ObjectWritable> output, Reporter reporter) throws IOException {
            ObjectWritable objWrite = new ObjectWritable();
            objWrite.set((Object)value);
            output.collect((Object)key, (Object)objWrite);
        }

        public void reduce(Text key, Iterator<ObjectWritable> values, OutputCollector<Text, LinkDatum> output, Reporter reporter) throws IOException {
            String fromUrl = key.toString();
            ArrayList<Writable> outlinks = new ArrayList<Writable>();
            Node node = null;
            Loops.LoopSet loops = null;
            while (values.hasNext()) {
                ObjectWritable write = values.next();
                Object obj = write.get();
                if (obj instanceof Node) {
                    node = (Node)obj;
                    continue;
                }
                if (obj instanceof LinkDatum) {
                    outlinks.add(WritableUtils.clone((Writable)((LinkDatum)obj), (Configuration)this.conf));
                    continue;
                }
                if (!(obj instanceof Loops.LoopSet)) continue;
                loops = (Loops.LoopSet)obj;
            }
            if (node == null && loops != null) {
                LOG.warn("LoopSet without Node object received for " + key.toString() + " . You should either not use Loops as input of the LinkRank program or rerun the Loops program over the WebGraph.");
                return;
            }
            int numOutlinks = node.getNumOutlinks();
            float inlinkScore = node.getInlinkScore();
            float outlinkScore = node.getOutlinkScore();
            LOG.debug(fromUrl + ": num outlinks " + numOutlinks);
            if (numOutlinks > 0) {
                Set<String> loopSet = loops != null ? loops.getLoopSet() : null;
                for (int i = 0; i < outlinks.size(); ++i) {
                    LinkDatum outlink = (LinkDatum)outlinks.get(i);
                    String toUrl = outlink.getUrl();
                    if (loopSet != null && loopSet.contains(toUrl)) {
                        LOG.debug(fromUrl + ": Skipping inverting inlink from loop " + toUrl);
                        continue;
                    }
                    outlink.setUrl(fromUrl);
                    outlink.setScore(outlinkScore);
                    output.collect((Object)new Text(toUrl), (Object)outlink);
                    LOG.debug(toUrl + ": inverting inlink from " + fromUrl + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks + " inlinkscore: " + outlinkScore);
                }
            }
        }

        public void close() {
        }
    }

    private static class Initializer
    implements Mapper<Text, Node, Text, Node> {
        private JobConf conf;
        private float initialScore = 1.0f;

        private Initializer() {
        }

        public void configure(JobConf conf) {
            this.conf = conf;
            this.initialScore = conf.getFloat("link.analyze.initial.score", 1.0f);
        }

        public void map(Text key, Node node, OutputCollector<Text, Node> output, Reporter reporter) throws IOException {
            String url = key.toString();
            Node outNode = (Node)WritableUtils.clone((Writable)node, (Configuration)this.conf);
            outNode.setInlinkScore(this.initialScore);
            output.collect((Object)new Text(url), (Object)outNode);
        }

        public void close() {
        }
    }

    private static class Counter
    implements Mapper<Text, Node, Text, LongWritable>,
    Reducer<Text, LongWritable, Text, LongWritable> {
        private static Text numNodes = new Text("_num_nodes_");
        private static LongWritable one = new LongWritable(1L);

        private Counter() {
        }

        public void configure(JobConf conf) {
        }

        public void map(Text key, Node value, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException {
            output.collect((Object)numNodes, (Object)one);
        }

        public void reduce(Text key, Iterator<LongWritable> values, OutputCollector<Text, LongWritable> output, Reporter reporter) throws IOException {
            long total = 0L;
            while (values.hasNext()) {
                total += values.next().get();
            }
            output.collect((Object)numNodes, (Object)new LongWritable(total));
        }

        public void close() {
        }
    }
}

