/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.ProcessorURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.Link;
import org.archive.modules.extractor.LinkContext;
import org.archive.net.LaxURLCodec;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.state.KeyManager;
import org.archive.util.TextUtils;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class ExtractorURI
extends Extractor {
    private static final long serialVersionUID = 3L;
    private static Logger LOGGER = Logger.getLogger(ExtractorURI.class.getName());
    static final String ABS_HTTP_URI_PATTERN = "^https?://[^\\s<>]*$";
    private AtomicLong linksExtracted = new AtomicLong(0L);

    @Override
    protected boolean shouldProcess(ProcessorURI uri) {
        return true;
    }

    @Override
    public void extract(ProcessorURI curi) {
        ArrayList<Link> links = new ArrayList<Link>(curi.getOutLinks());
        int max = links.size();
        for (int i = 0; i < max; ++i) {
            Link wref = (Link)links.get(i);
            this.extractLink(curi, wref);
        }
    }

    protected void extractLink(ProcessorURI curi, Link wref) {
        UURI source = null;
        try {
            source = UURIFactory.getInstance((String)((Object)wref.getDestination()).toString());
        }
        catch (URIException e) {
            LOGGER.log(Level.FINE, "bad URI", e);
        }
        if (source == null) {
            return;
        }
        List<String> found = ExtractorURI.extractQueryStringLinks(source);
        for (String uri : found) {
            try {
                UURI src = curi.getUURI();
                UURI dest = UURIFactory.getInstance((String)uri);
                LinkContext lc = LinkContext.SPECULATIVE_MISC;
                Hop hop = Hop.SPECULATIVE;
                Link link = new Link((CharSequence)src, (CharSequence)dest, lc, hop);
                this.linksExtracted.incrementAndGet();
                curi.getOutLinks().add(link);
            }
            catch (URIException e) {
                LOGGER.log(Level.FINE, "bad URI", e);
            }
        }
    }

    protected static List<String> extractQueryStringLinks(UURI source) {
        String[] params;
        String decodedQuery;
        ArrayList<String> results = new ArrayList<String>();
        try {
            decodedQuery = source.getQuery();
        }
        catch (URIException e1) {
            return results;
        }
        if (decodedQuery == null) {
            return results;
        }
        Matcher m = TextUtils.getMatcher((String)ABS_HTTP_URI_PATTERN, (CharSequence)decodedQuery);
        if (m.matches()) {
            TextUtils.recycleMatcher((Matcher)m);
            results.add(decodedQuery);
        }
        String rawQuery = new String(source.getRawQuery());
        for (String param : params = rawQuery.split("&")) {
            String candidate;
            String[] keyVal = param.split("=");
            if (keyVal.length != 2) continue;
            try {
                candidate = LaxURLCodec.DEFAULT.decode(keyVal[1]);
            }
            catch (DecoderException e) {
                continue;
            }
            m.reset(candidate);
            if (!m.matches()) continue;
            results.add(candidate);
        }
        return results;
    }

    @Override
    public String report() {
        StringBuffer ret = new StringBuffer();
        ret.append("Processor: " + ExtractorURI.class.getName() + "\n");
        ret.append("  Function:          Extracts links inside other URIs\n");
        ret.append("  CrawlURIs handled: " + this.getURICount() + "\n");
        ret.append("  Links extracted:   " + this.linksExtracted + "\n\n");
        return ret.toString();
    }

    static {
        KeyManager.addKeys(ExtractorURI.class);
    }
}

