/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.scoring.webgraph;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ObjectWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.scoring.webgraph.Node;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ScoreUpdater
extends Configured
implements Tool,
Mapper<Text, Writable, Text, ObjectWritable>,
Reducer<Text, ObjectWritable, Text, CrawlDatum> {
    public static final Logger LOG = LoggerFactory.getLogger(ScoreUpdater.class);
    private JobConf conf;
    private float clearScore = 0.0f;

    public void configure(JobConf conf) {
        this.conf = conf;
        this.clearScore = conf.getFloat("link.score.updater.clear.score", 0.0f);
    }

    public void map(Text key, Writable value, OutputCollector<Text, ObjectWritable> output, Reporter reporter) throws IOException {
        ObjectWritable objWrite = new ObjectWritable();
        objWrite.set((Object)value);
        output.collect((Object)key, (Object)objWrite);
    }

    public void reduce(Text key, Iterator<ObjectWritable> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
        String url = key.toString();
        Node node = null;
        CrawlDatum datum = null;
        while (values.hasNext()) {
            ObjectWritable next = values.next();
            Object value = next.get();
            if (value instanceof Node) {
                node = (Node)value;
                continue;
            }
            if (!(value instanceof CrawlDatum)) continue;
            datum = (CrawlDatum)value;
        }
        if (datum != null) {
            if (node != null) {
                float inlinkScore = node.getInlinkScore();
                datum.setScore(inlinkScore);
                LOG.debug(url + ": setting to score " + inlinkScore);
            } else {
                datum.setScore(this.clearScore);
                LOG.debug(url + ": setting to clear score of " + this.clearScore);
            }
            output.collect((Object)key, (Object)datum);
        } else {
            LOG.debug(url + ": no datum");
        }
    }

    public void close() {
    }

    public void update(Path crawlDb, Path webGraphDb) throws IOException {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("ScoreUpdater: starting at " + sdf.format(start));
        Configuration conf = this.getConf();
        FileSystem fs = FileSystem.get((Configuration)conf);
        LOG.info("Running crawldb update " + crawlDb);
        Path nodeDb = new Path(webGraphDb, "nodes");
        Path crawlDbCurrent = new Path(crawlDb, "current");
        Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        NutchJob updater = new NutchJob(conf);
        updater.setJobName("Update CrawlDb from WebGraph");
        FileInputFormat.addInputPath((JobConf)updater, (Path)crawlDbCurrent);
        FileInputFormat.addInputPath((JobConf)updater, (Path)nodeDb);
        FileOutputFormat.setOutputPath((JobConf)updater, (Path)newCrawlDb);
        updater.setInputFormat(SequenceFileInputFormat.class);
        updater.setMapperClass(ScoreUpdater.class);
        updater.setReducerClass(ScoreUpdater.class);
        updater.setMapOutputKeyClass(Text.class);
        updater.setMapOutputValueClass(ObjectWritable.class);
        updater.setOutputKeyClass(Text.class);
        updater.setOutputValueClass(CrawlDatum.class);
        updater.setOutputFormat(MapFileOutputFormat.class);
        try {
            JobClient.runJob((JobConf)updater);
        }
        catch (IOException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            if (fs.exists(newCrawlDb)) {
                fs.delete(newCrawlDb, true);
            }
            throw e;
        }
        LOG.info("ScoreUpdater: installing new crawldb " + crawlDb);
        CrawlDb.install(updater, crawlDb);
        long end = System.currentTimeMillis();
        LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new ScoreUpdater(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        Options options = new Options();
        OptionBuilder.withArgName((String)"help");
        OptionBuilder.withDescription((String)"show this help message");
        Option helpOpts = OptionBuilder.create((String)"help");
        options.addOption(helpOpts);
        OptionBuilder.withArgName((String)"crawldb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the crawldb to use");
        Option crawlDbOpts = OptionBuilder.create((String)"crawldb");
        options.addOption(crawlDbOpts);
        OptionBuilder.withArgName((String)"webgraphdb");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription((String)"the webgraphdb to use");
        Option webGraphOpts = OptionBuilder.create((String)"webgraphdb");
        options.addOption(webGraphOpts);
        GnuParser parser = new GnuParser();
        try {
            CommandLine line = parser.parse(options, args);
            if (line.hasOption("help") || !line.hasOption("webgraphdb") || !line.hasOption("crawldb")) {
                HelpFormatter formatter = new HelpFormatter();
                formatter.printHelp("ScoreUpdater", options);
                return -1;
            }
            String crawlDb = line.getOptionValue("crawldb");
            String webGraphDb = line.getOptionValue("webgraphdb");
            this.update(new Path(crawlDb), new Path(webGraphDb));
            return 0;
        }
        catch (Exception e) {
            LOG.error("ScoreUpdater: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
    }
}

