/*
 * Decompiled with CFR 0.152.
 */
package org.archive.crawler.postprocessor;

import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.framework.Scoper;
import org.archive.modules.PostProcessor;
import org.archive.modules.ProcessorURI;
import org.archive.modules.deciderules.DecideResult;
import org.archive.modules.deciderules.DecideRuleSequence;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.Link;
import org.archive.state.Expert;
import org.archive.state.Key;
import org.archive.state.KeyManager;

public class LinksScoper
extends Scoper
implements PostProcessor {
    private static final long serialVersionUID = -3L;
    private static Logger LOGGER = Logger.getLogger(LinksScoper.class.getName());
    @Expert
    public static final Key<Boolean> SEED_REDIRECTS_NEW_SEEDS = Key.make((boolean)true);
    @Expert
    public static final Key<DecideRuleSequence> LOG_REJECTS_RULES = Key.make(DecideRuleSequence.class, DecideRuleSequence.class);
    @Expert
    public static final Key<Integer> PREFERENCE_DEPTH_HOPS = Key.make((int)-1);

    protected boolean shouldProcess(ProcessorURI puri) {
        if (!(puri instanceof CrawlURI)) {
            return false;
        }
        CrawlURI curi = (CrawlURI)puri;
        if (curi.hasPrerequisiteUri()) {
            this.handlePrerequisite(curi);
            return false;
        }
        if (curi.getFetchStatus() < 200 || curi.getFetchStatus() >= 400) {
            curi.getOutLinks().clear();
            return false;
        }
        return !curi.getOutLinks().isEmpty();
    }

    protected void innerProcess(ProcessorURI puri) {
        CrawlURI curi = (CrawlURI)puri;
        boolean redirectsNewSeeds = curi.get((Object)this, SEED_REDIRECTS_NEW_SEEDS);
        int preferenceDepthHops = curi.get((Object)this, PREFERENCE_DEPTH_HOPS);
        for (Link wref : curi.getOutLinks()) {
            try {
                int directive = this.getSchedulingFor(curi, wref, preferenceDepthHops);
                CrawlURI caURI = curi.createCrawlURI(curi.getBaseURI(), wref, directive, this.considerAsSeed(curi, wref, redirectsNewSeeds));
                if (!this.isInScope(caURI)) continue;
                curi.getOutCandidates().add(caURI);
            }
            catch (URIException e) {
                this.loggerModule.logUriError(e, curi.getUURI(), ((Object)wref.getDestination()).toString());
            }
        }
        curi.getOutLinks().clear();
    }

    protected void handlePrerequisite(CrawlURI curi) {
        try {
            CrawlURI caUri = curi.createCrawlURI(curi.getBaseURI(), (Link)curi.getPrerequisiteUri());
            int prereqPriority = curi.getSchedulingDirective() - 1;
            if (prereqPriority < 0) {
                prereqPriority = 0;
                LOGGER.severe("Unable to promote prerequisite " + caUri + " above " + curi);
            }
            caUri.setSchedulingDirective(prereqPriority);
            caUri.setForceFetch(true);
            if (this.isInScope(caUri)) {
                curi.setPrerequisiteUri(caUri);
            } else {
                curi.setFetchStatus(-63);
            }
        }
        catch (URIException ex) {
            Object[] array = new Object[]{curi, curi.getPrerequisiteUri()};
            this.loggerModule.getUriErrors().log(Level.INFO, ex.getMessage(), array);
        }
        catch (NumberFormatException e) {
            Object[] array = new Object[]{curi, curi.getPrerequisiteUri()};
            this.loggerModule.getUriErrors().log(Level.INFO, e.getMessage(), array);
        }
    }

    protected void outOfScope(CrawlURI caUri) {
        super.outOfScope(caUri);
        if (!LOGGER.isLoggable(Level.INFO)) {
            return;
        }
        DecideRuleSequence seq = caUri.get((Object)this, LOG_REJECTS_RULES);
        if (seq.decisionFor((ProcessorURI)caUri) == DecideResult.ACCEPT) {
            LOGGER.info(caUri.getUURI().toString());
        }
    }

    private boolean considerAsSeed(CrawlURI curi, Link wref, boolean redirectsNewSeeds) {
        return redirectsNewSeeds && curi.isSeed() && wref.getHopType() == Hop.REFER;
    }

    protected int getSchedulingFor(CrawlURI curi, Link wref, int preferenceDepthHops) {
        Hop c = wref.getHopType();
        if (LOGGER.isLoggable(Level.FINEST)) {
            LOGGER.finest(curi + " with path=" + curi.getPathFromSeed() + " isSeed=" + curi.isSeed() + " with fetchStatus=" + curi.getFetchStatus() + " -> " + wref.getDestination() + " type " + c + " with context=" + wref.getContext());
        }
        switch (c) {
            case REFER: {
                return preferenceDepthHops >= 0 ? 1 : 2;
            }
        }
        if (preferenceDepthHops == 0) {
            return 1;
        }
        if (preferenceDepthHops > 0 && curi.getPathFromSeed().length() + 1 <= preferenceDepthHops) {
            return 1;
        }
        return 3;
    }

    static {
        KeyManager.addKeys(LinksScoper.class);
    }
}

