/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.CrawlDb;
import org.apache.nutch.crawl.FetchSchedule;
import org.apache.nutch.crawl.FetchScheduleFactory;
import org.apache.nutch.crawl.URLPartitioner;
import org.apache.nutch.metadata.Nutch;
import org.apache.nutch.net.URLFilterException;
import org.apache.nutch.net.URLFilters;
import org.apache.nutch.net.URLNormalizers;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
import org.apache.nutch.util.LockUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.TimingUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Generator
extends Configured
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(Generator.class);
    public static final String GENERATE_UPDATE_CRAWLDB = "generate.update.crawldb";
    public static final String GENERATOR_MIN_SCORE = "generate.min.score";
    public static final String GENERATOR_MIN_INTERVAL = "generate.min.interval";
    public static final String GENERATOR_RESTRICT_STATUS = "generate.restrict.status";
    public static final String GENERATOR_FILTER = "generate.filter";
    public static final String GENERATOR_NORMALISE = "generate.normalise";
    public static final String GENERATOR_MAX_COUNT = "generate.max.count";
    public static final String GENERATOR_COUNT_MODE = "generate.count.mode";
    public static final String GENERATOR_COUNT_VALUE_DOMAIN = "domain";
    public static final String GENERATOR_COUNT_VALUE_HOST = "host";
    public static final String GENERATOR_TOP_N = "generate.topN";
    public static final String GENERATOR_CUR_TIME = "generate.curTime";
    public static final String GENERATOR_DELAY = "crawl.gen.delay";
    public static final String GENERATOR_MAX_NUM_SEGMENTS = "generate.max.num.segments";
    public static final String GENERATE_MAX_PER_HOST_BY_IP = "generate.max.per.host.by.ip";
    private static SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmss");

    public Generator() {
    }

    public Generator(Configuration conf) {
        this.setConf(conf);
    }

    public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime) throws IOException {
        NutchJob job = new NutchJob(this.getConf());
        boolean filter = job.getBoolean(GENERATOR_FILTER, true);
        boolean normalise = job.getBoolean(GENERATOR_NORMALISE, true);
        return this.generate(dbDir, segments, numLists, topN, curTime, filter, normalise, false, 1);
    }

    public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter, boolean force) throws IOException {
        return this.generate(dbDir, segments, numLists, topN, curTime, filter, true, force, 1);
    }

    public Path[] generate(Path dbDir, Path segments, int numLists, long topN, long curTime, boolean filter, boolean norm, boolean force, int maxNumSegments) throws IOException {
        Path tempDir = new Path(this.getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());
        Path lock = new Path(dbDir, ".locked");
        FileSystem fs = FileSystem.get((Configuration)this.getConf());
        LockUtil.createLockFile(fs, lock, force);
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        long start = System.currentTimeMillis();
        LOG.info("Generator: starting at " + sdf.format(start));
        LOG.info("Generator: Selecting best-scoring urls due for fetch.");
        LOG.info("Generator: filtering: " + filter);
        LOG.info("Generator: normalizing: " + norm);
        if (topN != Long.MAX_VALUE) {
            LOG.info("Generator: topN: " + topN);
        }
        if ("true".equals(this.getConf().get(GENERATE_MAX_PER_HOST_BY_IP))) {
            LOG.info("Generator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead");
        }
        NutchJob job = new NutchJob(this.getConf());
        job.setJobName("generate: select from " + dbDir);
        if (numLists == -1) {
            numLists = job.getNumMapTasks();
        }
        if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
            LOG.info("Generator: jobtracker is 'local', generating exactly one partition.");
            numLists = 1;
        }
        job.setLong(GENERATOR_CUR_TIME, curTime);
        long generateTime = System.currentTimeMillis();
        job.setLong("_ngt_", generateTime);
        job.setLong(GENERATOR_TOP_N, topN);
        job.setBoolean(GENERATOR_FILTER, filter);
        job.setBoolean(GENERATOR_NORMALISE, norm);
        job.setInt(GENERATOR_MAX_NUM_SEGMENTS, maxNumSegments);
        FileInputFormat.addInputPath((JobConf)job, (Path)new Path(dbDir, "current"));
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setMapperClass(Selector.class);
        job.setPartitionerClass(Selector.class);
        job.setReducerClass(Selector.class);
        FileOutputFormat.setOutputPath((JobConf)job, (Path)tempDir);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(FloatWritable.class);
        job.setOutputKeyComparatorClass(DecreasingFloatComparator.class);
        job.setOutputValueClass(SelectorEntry.class);
        job.setOutputFormat(GeneratorOutputFormat.class);
        JobClient.runJob((JobConf)job);
        ArrayList<Path> generatedSegments = new ArrayList<Path>();
        FileStatus[] status = fs.listStatus(tempDir);
        try {
            for (FileStatus stat : status) {
                Path subfetchlist = stat.getPath();
                if (!subfetchlist.getName().startsWith("fetchlist-")) continue;
                Path newSeg = this.partitionSegment(fs, segments, subfetchlist, numLists);
                generatedSegments.add(newSeg);
            }
        }
        catch (Exception e) {
            LOG.warn("Generator: exception while partitioning segments, exiting ...");
            fs.delete(tempDir, true);
            return null;
        }
        if (generatedSegments.size() == 0) {
            LOG.warn("Generator: 0 records selected for fetching, exiting ...");
            LockUtil.removeLockFile(fs, lock);
            fs.delete(tempDir, true);
            return null;
        }
        if (this.getConf().getBoolean(GENERATE_UPDATE_CRAWLDB, false)) {
            Path tempDir2 = new Path(this.getConf().get("mapred.temp.dir", ".") + "/generate-temp-" + UUID.randomUUID().toString());
            job = new NutchJob(this.getConf());
            job.setJobName("generate: updatedb " + dbDir);
            job.setLong("_ngt_", generateTime);
            for (Path segmpaths : generatedSegments) {
                Path subGenDir = new Path(segmpaths, "crawl_generate");
                FileInputFormat.addInputPath((JobConf)job, (Path)subGenDir);
            }
            FileInputFormat.addInputPath((JobConf)job, (Path)new Path(dbDir, "current"));
            job.setInputFormat(SequenceFileInputFormat.class);
            job.setMapperClass(CrawlDbUpdater.class);
            job.setReducerClass(CrawlDbUpdater.class);
            job.setOutputFormat(MapFileOutputFormat.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(CrawlDatum.class);
            FileOutputFormat.setOutputPath((JobConf)job, (Path)tempDir2);
            try {
                JobClient.runJob((JobConf)job);
                CrawlDb.install(job, dbDir);
            }
            catch (IOException e) {
                LockUtil.removeLockFile(fs, lock);
                fs.delete(tempDir, true);
                fs.delete(tempDir2, true);
                throw e;
            }
            fs.delete(tempDir2, true);
        }
        LockUtil.removeLockFile(fs, lock);
        fs.delete(tempDir, true);
        long end = System.currentTimeMillis();
        LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
        Path[] patharray = new Path[generatedSegments.size()];
        return generatedSegments.toArray(patharray);
    }

    private Path partitionSegment(FileSystem fs, Path segmentsDir, Path inputDir, int numLists) throws IOException {
        if (LOG.isInfoEnabled()) {
            LOG.info("Generator: Partitioning selected urls for politeness.");
        }
        Path segment = new Path(segmentsDir, Generator.generateSegmentName());
        Path output = new Path(segment, "crawl_generate");
        LOG.info("Generator: segment: " + segment);
        NutchJob job = new NutchJob(this.getConf());
        job.setJobName("generate: partition " + segment);
        job.setInt("partition.url.seed", new Random().nextInt());
        FileInputFormat.addInputPath((JobConf)job, (Path)inputDir);
        job.setInputFormat(SequenceFileInputFormat.class);
        job.setMapperClass(SelectorInverseMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(SelectorEntry.class);
        job.setPartitionerClass(URLPartitioner.class);
        job.setReducerClass(PartitionReducer.class);
        job.setNumReduceTasks(numLists);
        FileOutputFormat.setOutputPath((JobConf)job, (Path)output);
        job.setOutputFormat(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlDatum.class);
        job.setOutputKeyComparatorClass(HashComparator.class);
        JobClient.runJob((JobConf)job);
        return segment;
    }

    public static synchronized String generateSegmentName() {
        try {
            Thread.sleep(1000L);
        }
        catch (Throwable throwable) {
            // empty catch block
        }
        return sdf.format(new Date(System.currentTimeMillis()));
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new Generator(), (String[])args);
        System.exit(res);
    }

    public int run(String[] args) throws Exception {
        if (args.length < 2) {
            System.out.println("Usage: Generator <crawldb> <segments_dir> [-force] [-topN N] [-numFetchers numFetchers] [-adddays numDays] [-noFilter] [-noNorm][-maxNumSegments num]");
            return -1;
        }
        Path dbDir = new Path(args[0]);
        Path segmentsDir = new Path(args[1]);
        long curTime = System.currentTimeMillis();
        long topN = Long.MAX_VALUE;
        int numFetchers = -1;
        boolean filter = true;
        boolean norm = true;
        boolean force = false;
        int maxNumSegments = 1;
        for (int i = 2; i < args.length; ++i) {
            if ("-topN".equals(args[i])) {
                topN = Long.parseLong(args[i + 1]);
                ++i;
                continue;
            }
            if ("-numFetchers".equals(args[i])) {
                numFetchers = Integer.parseInt(args[i + 1]);
                ++i;
                continue;
            }
            if ("-adddays".equals(args[i])) {
                long numDays = Integer.parseInt(args[i + 1]);
                curTime += numDays * 1000L * 60L * 60L * 24L;
                continue;
            }
            if ("-noFilter".equals(args[i])) {
                filter = false;
                continue;
            }
            if ("-noNorm".equals(args[i])) {
                norm = false;
                continue;
            }
            if ("-force".equals(args[i])) {
                force = true;
                continue;
            }
            if (!"-maxNumSegments".equals(args[i])) continue;
            maxNumSegments = Integer.parseInt(args[i + 1]);
        }
        try {
            Path[] segs = this.generate(dbDir, segmentsDir, numFetchers, topN, curTime, filter, norm, force, maxNumSegments);
            if (segs == null) {
                return -1;
            }
        }
        catch (Exception e) {
            LOG.error("Generator: " + StringUtils.stringifyException((Throwable)e));
            return -1;
        }
        return 0;
    }

    public static class CrawlDbUpdater
    extends MapReduceBase
    implements Mapper<Text, CrawlDatum, Text, CrawlDatum>,
    Reducer<Text, CrawlDatum, Text, CrawlDatum> {
        long generateTime;
        private CrawlDatum orig = new CrawlDatum();
        private LongWritable genTime = new LongWritable(0L);

        public void configure(JobConf job) {
            this.generateTime = job.getLong("_ngt_", 0L);
        }

        public void map(Text key, CrawlDatum value, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
            output.collect((Object)key, (Object)value);
        }

        public void reduce(Text key, Iterator<CrawlDatum> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
            this.genTime.set(0L);
            while (values.hasNext()) {
                CrawlDatum val = values.next();
                if (val.getMetaData().containsKey((Object)Nutch.WRITABLE_GENERATE_TIME_KEY)) {
                    LongWritable gt = (LongWritable)val.getMetaData().get((Object)Nutch.WRITABLE_GENERATE_TIME_KEY);
                    this.genTime.set(gt.get());
                    if (this.genTime.get() == this.generateTime) continue;
                    this.orig.set(val);
                    this.genTime.set(0L);
                    continue;
                }
                this.orig.set(val);
            }
            if (this.genTime.get() != 0L) {
                this.orig.getMetaData().put((Writable)Nutch.WRITABLE_GENERATE_TIME_KEY, (Writable)this.genTime);
            }
            output.collect((Object)key, (Object)this.orig);
        }
    }

    public static class HashComparator
    extends WritableComparator {
        public HashComparator() {
            super(Text.class);
        }

        public int compare(WritableComparable a, WritableComparable b) {
            int hash2;
            Text url1 = (Text)a;
            Text url2 = (Text)b;
            int hash1 = HashComparator.hash(url1.getBytes(), 0, url1.getLength());
            return hash1 < (hash2 = HashComparator.hash(url2.getBytes(), 0, url2.getLength())) ? -1 : (hash1 == hash2 ? 0 : 1);
        }

        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
            int hash2;
            int hash1 = HashComparator.hash(b1, s1, l1);
            return hash1 < (hash2 = HashComparator.hash(b2, s2, l2)) ? -1 : (hash1 == hash2 ? 0 : 1);
        }

        private static int hash(byte[] bytes, int start, int length) {
            int hash = 1;
            for (int i = length - 1; i >= 0; --i) {
                hash = 31 * hash + bytes[start + i];
            }
            return hash;
        }
    }

    public static class PartitionReducer
    extends MapReduceBase
    implements Reducer<Text, SelectorEntry, Text, CrawlDatum> {
        public void reduce(Text key, Iterator<SelectorEntry> values, OutputCollector<Text, CrawlDatum> output, Reporter reporter) throws IOException {
            while (values.hasNext()) {
                SelectorEntry entry = values.next();
                output.collect((Object)entry.url, (Object)entry.datum);
            }
        }
    }

    public static class SelectorInverseMapper
    extends MapReduceBase
    implements Mapper<FloatWritable, SelectorEntry, Text, SelectorEntry> {
        public void map(FloatWritable key, SelectorEntry value, OutputCollector<Text, SelectorEntry> output, Reporter reporter) throws IOException {
            SelectorEntry entry = value;
            output.collect((Object)entry.url, (Object)entry);
        }
    }

    public static class DecreasingFloatComparator
    extends FloatWritable.Comparator {
        public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
            return super.compare(b2, s2, l2, b1, s1, l1);
        }
    }

    public static class GeneratorOutputFormat
    extends MultipleSequenceFileOutputFormat<FloatWritable, SelectorEntry> {
        protected String generateFileNameForKeyValue(FloatWritable key, SelectorEntry value, String name) {
            return "fetchlist-" + value.segnum.toString() + "/" + name;
        }
    }

    public static class Selector
    implements Mapper<Text, CrawlDatum, FloatWritable, SelectorEntry>,
    Partitioner<FloatWritable, Writable>,
    Reducer<FloatWritable, SelectorEntry, FloatWritable, SelectorEntry> {
        private LongWritable genTime = new LongWritable(System.currentTimeMillis());
        private long curTime;
        private long limit;
        private long count;
        private HashMap<String, int[]> hostCounts = new HashMap();
        private int[] segCounts;
        private int maxCount;
        private boolean byDomain = false;
        private Partitioner<Text, Writable> partitioner = new URLPartitioner();
        private URLFilters filters;
        private URLNormalizers normalizers;
        private ScoringFilters scfilters;
        private SelectorEntry entry = new SelectorEntry();
        private FloatWritable sortValue = new FloatWritable();
        private boolean filter;
        private boolean normalise;
        private long genDelay;
        private FetchSchedule schedule;
        private float scoreThreshold = 0.0f;
        private int intervalThreshold = -1;
        private String restrictStatus = null;
        private int maxNumSegments = 1;
        int currentsegmentnum = 1;

        public void configure(JobConf job) {
            this.curTime = job.getLong(Generator.GENERATOR_CUR_TIME, System.currentTimeMillis());
            this.limit = job.getLong(Generator.GENERATOR_TOP_N, Long.MAX_VALUE) / (long)job.getNumReduceTasks();
            this.maxCount = job.getInt(Generator.GENERATOR_MAX_COUNT, -1);
            if (this.maxCount == -1) {
                this.byDomain = false;
            }
            if (Generator.GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(Generator.GENERATOR_COUNT_MODE))) {
                this.byDomain = true;
            }
            this.filters = new URLFilters((Configuration)job);
            this.normalise = job.getBoolean(Generator.GENERATOR_NORMALISE, true);
            if (this.normalise) {
                this.normalizers = new URLNormalizers((Configuration)job, "generate_host_count");
            }
            this.scfilters = new ScoringFilters((Configuration)job);
            this.partitioner.configure(job);
            this.filter = job.getBoolean(Generator.GENERATOR_FILTER, true);
            this.genDelay = job.getLong(Generator.GENERATOR_DELAY, 7L) * 3600L * 24L * 1000L;
            long time = job.getLong("_ngt_", 0L);
            if (time > 0L) {
                this.genTime.set(time);
            }
            this.schedule = FetchScheduleFactory.getFetchSchedule((Configuration)job);
            this.scoreThreshold = job.getFloat(Generator.GENERATOR_MIN_SCORE, Float.NaN);
            this.intervalThreshold = job.getInt(Generator.GENERATOR_MIN_INTERVAL, -1);
            this.restrictStatus = job.get(Generator.GENERATOR_RESTRICT_STATUS, null);
            this.maxNumSegments = job.getInt(Generator.GENERATOR_MAX_NUM_SEGMENTS, 1);
            this.segCounts = new int[this.maxNumSegments];
        }

        public void close() {
        }

        public void map(Text key, CrawlDatum value, OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter) throws IOException {
            float sort;
            CrawlDatum crawlDatum;
            block12: {
                Text url;
                block11: {
                    url = key;
                    if (this.filter) {
                        try {
                            if (this.filters.filter(url.toString()) == null) {
                                return;
                            }
                        }
                        catch (URLFilterException e) {
                            if (!LOG.isWarnEnabled()) break block11;
                            LOG.warn("Couldn't filter url: " + url + " (" + e.getMessage() + ")");
                        }
                    }
                }
                if (!this.schedule.shouldFetch(url, crawlDatum = value, this.curTime)) {
                    LOG.debug("-shouldFetch rejected '" + url + "', fetchTime=" + crawlDatum.getFetchTime() + ", curTime=" + this.curTime);
                    return;
                }
                LongWritable oldGenTime = (LongWritable)crawlDatum.getMetaData().get((Object)Nutch.WRITABLE_GENERATE_TIME_KEY);
                if (oldGenTime != null && oldGenTime.get() + this.genDelay > this.curTime) {
                    return;
                }
                sort = 1.0f;
                try {
                    sort = this.scfilters.generatorSortValue(key, crawlDatum, sort);
                }
                catch (ScoringFilterException sfe) {
                    if (!LOG.isWarnEnabled()) break block12;
                    LOG.warn("Couldn't filter generatorSortValue for " + key + ": " + sfe);
                }
            }
            if (this.restrictStatus != null && !this.restrictStatus.equalsIgnoreCase(CrawlDatum.getStatusName(crawlDatum.getStatus()))) {
                return;
            }
            if (this.scoreThreshold != Float.NaN && sort < this.scoreThreshold) {
                return;
            }
            if (this.intervalThreshold != -1 && crawlDatum.getFetchInterval() > this.intervalThreshold) {
                return;
            }
            this.sortValue.set(sort);
            crawlDatum.getMetaData().put((Writable)Nutch.WRITABLE_GENERATE_TIME_KEY, (Writable)this.genTime);
            this.entry.datum = crawlDatum;
            this.entry.url = key;
            output.collect((Object)this.sortValue, (Object)this.entry);
        }

        public int getPartition(FloatWritable key, Writable value, int numReduceTasks) {
            return this.partitioner.getPartition((Object)((SelectorEntry)value).url, (Object)key, numReduceTasks);
        }

        public void reduce(FloatWritable key, Iterator<SelectorEntry> values, OutputCollector<FloatWritable, SelectorEntry> output, Reporter reporter) throws IOException {
            while (values.hasNext()) {
                if (this.count == this.limit) {
                    if (this.currentsegmentnum >= this.maxNumSegments) break;
                    this.count = 0L;
                    ++this.currentsegmentnum;
                }
                SelectorEntry entry = values.next();
                Text url = entry.url;
                String urlString = url.toString();
                URL u = null;
                String hostordomain = null;
                try {
                    if (this.normalise && this.normalizers != null) {
                        urlString = this.normalizers.normalize(urlString, "generate_host_count");
                    }
                    u = new URL(urlString);
                    hostordomain = this.byDomain ? URLUtil.getDomainName(u) : new URL(urlString).getHost();
                }
                catch (Exception e) {
                    LOG.warn("Malformed URL: '" + urlString + "', skipping (" + StringUtils.stringifyException((Throwable)e) + ")");
                    reporter.getCounter("Generator", "MALFORMED_URL").increment(1L);
                    continue;
                }
                hostordomain = hostordomain.toLowerCase();
                if (this.maxCount > 0) {
                    int[] hostCount = this.hostCounts.get(hostordomain);
                    if (hostCount == null) {
                        hostCount = new int[]{1, 0};
                        this.hostCounts.put(hostordomain, hostCount);
                    }
                    hostCount[1] = hostCount[1] + 1;
                    while ((long)this.segCounts[hostCount[0] - 1] >= this.limit && hostCount[0] < this.maxNumSegments) {
                        hostCount[0] = hostCount[0] + 1;
                        hostCount[1] = 0;
                    }
                    if (hostCount[1] >= this.maxCount) {
                        if (hostCount[0] < this.maxNumSegments) {
                            hostCount[0] = hostCount[0] + 1;
                            hostCount[1] = 0;
                        } else {
                            if (hostCount[1] != this.maxCount + 1 || !LOG.isInfoEnabled()) continue;
                            LOG.info("Host or domain " + hostordomain + " has more than " + this.maxCount + " URLs for all " + this.maxNumSegments + " segments. Additional URLs won't be included in the fetchlist.");
                            continue;
                        }
                    }
                    entry.segnum = new IntWritable(hostCount[0]);
                    int n = hostCount[0] - 1;
                    this.segCounts[n] = this.segCounts[n] + 1;
                } else {
                    entry.segnum = new IntWritable(this.currentsegmentnum);
                    int n = this.currentsegmentnum - 1;
                    this.segCounts[n] = this.segCounts[n] + 1;
                }
                output.collect((Object)key, (Object)entry);
                ++this.count;
            }
        }
    }

    public static class SelectorEntry
    implements Writable {
        public Text url = new Text();
        public CrawlDatum datum = new CrawlDatum();
        public IntWritable segnum = new IntWritable(0);

        public void readFields(DataInput in) throws IOException {
            this.url.readFields(in);
            this.datum.readFields(in);
            this.segnum.readFields(in);
        }

        public void write(DataOutput out) throws IOException {
            this.url.write(out);
            this.datum.write(out);
            this.segnum.write(out);
        }

        public String toString() {
            return "url=" + this.url.toString() + ", datum=" + this.datum.toString() + ", segnum=" + this.segnum.toString();
        }
    }
}

