/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.indexer;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.indexer.IndexWriters;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CleaningJob
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(CleaningJob.class);
    private Configuration conf;

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    public void delete(String crawldb, boolean noCommit) throws IOException {
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("CleaningJob: starting at " + sdf.format(start));
        NutchJob job = new NutchJob(this.getConf());
        FileInputFormat.addInputPath((JobConf)job, (Path)new Path(crawldb, "current"));
        job.setBoolean("noCommit", noCommit);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setOutputFormat(NullOutputFormat.class);
        job.setMapOutputKeyClass(ByteWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setMapperClass(DBFilter.class);
        job.setReducerClass(DeleterReducer.class);
        job.setJobName("CleaningJob");
        job.setBoolean("indexer.delete", true);
        JobClient.runJob((JobConf)job);
        long end = System.currentTimeMillis();
        LOG.info("CleaningJob: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
    }

    public int run(String[] args) throws IOException {
        if (args.length < 1) {
            String usage = "Usage: CleaningJob <crawldb> [-noCommit]";
            LOG.error("Missing crawldb. " + usage);
            System.err.println(usage);
            IndexWriters writers = new IndexWriters(this.getConf());
            System.err.println(writers.describe());
            return 1;
        }
        boolean noCommit = false;
        if (args.length == 2 && args[1].equals("-noCommit")) {
            noCommit = true;
        }
        try {
            this.delete(args[0], noCommit);
        }
        catch (Exception e) {
            LOG.error("CleaningJob: " + StringUtils.stringifyException((Throwable)e));
            System.err.println("ERROR CleaningJob: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new CleaningJob(), (String[])args);
        System.exit(result);
    }

    public static class DeleterReducer
    implements Reducer<ByteWritable, Text, Text, ByteWritable> {
        private static final int NUM_MAX_DELETE_REQUEST = 1000;
        private int numDeletes = 0;
        private int totalDeleted = 0;
        private boolean noCommit = false;
        IndexWriters writers = null;

        public void configure(JobConf job) {
            this.writers = new IndexWriters((Configuration)job);
            try {
                this.writers.open(job, "Deletion");
            }
            catch (IOException e) {
                throw new RuntimeException(e);
            }
            this.noCommit = job.getBoolean("noCommit", false);
        }

        public void close() throws IOException {
            this.writers.close();
            if (this.totalDeleted > 0 && !this.noCommit) {
                this.writers.commit();
            }
            LOG.info("CleaningJob: deleted a total of " + this.totalDeleted + " documents");
        }

        public void reduce(ByteWritable key, Iterator<Text> values, OutputCollector<Text, ByteWritable> output, Reporter reporter) throws IOException {
            while (values.hasNext()) {
                Text document = values.next();
                this.writers.delete(document.toString());
                ++this.totalDeleted;
                reporter.incrCounter("CleaningJobStatus", "Deleted documents", 1L);
            }
        }
    }

    public static class DBFilter
    implements Mapper<Text, CrawlDatum, ByteWritable, Text> {
        private ByteWritable OUT = new ByteWritable(3);

        public void configure(JobConf arg0) {
        }

        public void close() throws IOException {
        }

        public void map(Text key, CrawlDatum value, OutputCollector<ByteWritable, Text> output, Reporter reporter) throws IOException {
            if (value.getStatus() == 3 || value.getStatus() == 7) {
                output.collect((Object)this.OUT, (Object)key);
            }
        }
    }
}

