/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.util.ArrayList;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.ProcessorURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.Link;
import org.archive.modules.extractor.LinkContext;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.state.Key;
import org.archive.state.KeyManager;

public class ExtractorImpliedURI
extends Extractor {
    private static final long serialVersionUID = 3L;
    private static Logger LOGGER = Logger.getLogger(ExtractorImpliedURI.class.getName());
    public static final Key<Pattern> TRIGGER_REGEXP = Key.make((Pattern)Pattern.compile("^(.*)$"));
    public static final Key<String> BUILD_PATTERN = Key.make((String)"");
    public static final Key<Boolean> REMOVE_TRIGGER_URIS = Key.make((boolean)false);
    final AtomicLong linksExtracted = new AtomicLong();

    protected boolean shouldProcess(ProcessorURI uri) {
        return true;
    }

    public void extract(ProcessorURI curi) {
        ArrayList<Link> links = new ArrayList<Link>(curi.getOutLinks());
        int max = links.size();
        for (int i = 0; i < max; ++i) {
            Link link = (Link)links.get(i);
            Pattern trigger = (Pattern)curi.get(this, TRIGGER_REGEXP);
            String build = (String)curi.get(this, BUILD_PATTERN);
            CharSequence dest = link.getDestination();
            String implied = ExtractorImpliedURI.extractImplied(dest, trigger, build);
            if (implied == null) continue;
            try {
                UURI src = curi.getUURI();
                UURI target = UURIFactory.getInstance((String)implied);
                LinkContext lc = LinkContext.SPECULATIVE_MISC;
                Hop hop = Hop.SPECULATIVE;
                Link out = new Link((CharSequence)src, (CharSequence)target, lc, hop);
                curi.getOutLinks().add(out);
                this.linksExtracted.incrementAndGet();
                boolean removeTriggerURI = (Boolean)curi.get(this, REMOVE_TRIGGER_URIS);
                if (!removeTriggerURI) continue;
                if (curi.getOutLinks().remove(link)) {
                    LOGGER.log(Level.FINE, link.getDestination() + " has been removed from " + link.getSource() + " outlinks list.");
                    this.linksExtracted.decrementAndGet();
                    continue;
                }
                LOGGER.log(Level.FINE, "Failed to remove " + link.getDestination() + " from " + link.getSource() + " outlinks list.");
                continue;
            }
            catch (URIException e) {
                LOGGER.log(Level.FINE, "bad URI", e);
            }
        }
    }

    protected static String extractImplied(CharSequence uri, Pattern trigger, String build) {
        if (trigger == null) {
            return null;
        }
        Matcher m = trigger.matcher(uri);
        if (m.matches()) {
            String result = m.replaceFirst(build);
            return result;
        }
        return null;
    }

    public String report() {
        StringBuffer ret = new StringBuffer();
        ret.append("Processor: " + ExtractorImpliedURI.class.getName() + "\n");
        ret.append("  Function:          Extracts links inside other URIs\n");
        ret.append("  CrawlURIs handled: " + this.getURICount() + "\n");
        ret.append("  Links extracted:   " + this.linksExtracted.get() + "\n\n");
        return ret.toString();
    }

    static {
        KeyManager.addKeys(ExtractorImpliedURI.class);
    }
}

