/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.extractor;

import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.httpclient.URIException;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.ProcessorURI;
import org.archive.modules.extractor.ContentExtractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.Link;
import org.archive.modules.extractor.LinkContext;
import org.archive.modules.extractor.UriErrorLoggerModule;
import org.archive.net.LaxURLCodec;
import org.archive.state.KeyManager;
import org.archive.util.DevUtils;
import org.archive.util.TextUtils;

public class ExtractorJS
extends ContentExtractor {
    private static final long serialVersionUID = 2L;
    private static Logger LOGGER = Logger.getLogger("org.archive.crawler.extractor.ExtractorJS");
    static final String AMP = "&";
    static final String ESCAPED_AMP = "&amp;";
    static final String WHITESPACE = "\\s";
    static final String JAVASCRIPT_STRING_EXTRACTOR = "(\\\\{0,8}+(?:\"|'))(\\S{0,2083}?)(?:\\1)";
    static final String STRING_URI_DETECTOR = "(?:\\w|[\\.]{0,2}/)[\\S&&[^<>]]*(?:\\.|/)[\\S&&[^<>]]*(?:\\w|/)";
    protected long numberOfCURIsHandled = 0L;
    protected static long numberOfLinksExtracted = 0L;

    protected boolean shouldExtract(ProcessorURI uri) {
        String contentType = uri.getContentType();
        if (contentType == null) {
            return false;
        }
        if (contentType.indexOf("javascript") >= 0) {
            return true;
        }
        if (contentType.indexOf("jscript") >= 0) {
            return true;
        }
        if (contentType.indexOf("ecmascript") >= 0) {
            return true;
        }
        if (uri.toString().toLowerCase().endsWith(".js")) {
            return true;
        }
        LinkContext context = uri.getViaContext();
        if (context == null) {
            return false;
        }
        String s = context.toString().toLowerCase();
        return s.startsWith("script");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected boolean innerExtract(ProcessorURI curi) {
        ++this.numberOfCURIsHandled;
        ReplayCharSequence cs = null;
        try {
            cs = curi.getRecorder().getReplayCharSequence();
        }
        catch (IOException e) {
            curi.getNonFatalFailures().add(e);
        }
        if (cs == null) {
            LOGGER.warning("Failed getting ReplayCharSequence: " + curi.toString());
            return false;
        }
        try {
            try {
                numberOfLinksExtracted += ExtractorJS.considerStrings(this.uriErrors, curi, (CharSequence)cs, true);
            }
            catch (StackOverflowError e) {
                DevUtils.warnHandle((Throwable)e, (String)"ExtractorJS StackOverflowError");
            }
            boolean bl = true;
            return bl;
        }
        finally {
            if (cs != null) {
                try {
                    cs.close();
                }
                catch (IOException ioe) {
                    LOGGER.warning(TextUtils.exceptionToString((String)"Failed close of ReplayCharSequence.", (Throwable)ioe));
                }
            }
        }
    }

    public static long considerStrings(UriErrorLoggerModule uriErrors, ProcessorURI curi, CharSequence cs, boolean handlingJSFile) {
        long foundLinks = 0L;
        Matcher strings = TextUtils.getMatcher((String)JAVASCRIPT_STRING_EXTRACTOR, (CharSequence)cs);
        while (strings.find()) {
            Matcher uri;
            block6: {
                CharSequence subsequence = cs.subSequence(strings.start(2), strings.end(2));
                uri = TextUtils.getMatcher((String)STRING_URI_DETECTOR, (CharSequence)subsequence);
                if (uri.matches()) {
                    String string = uri.group();
                    string = ExtractorJS.speculativeFixup(string, curi);
                    ++foundLinks;
                    try {
                        int max = uriErrors.getMaxOutlinks(curi);
                        if (handlingJSFile) {
                            Link.addRelativeToVia(curi, max, string, LinkContext.JS_MISC, Hop.SPECULATIVE);
                            break block6;
                        }
                        Link.addRelativeToBase(curi, max, string, LinkContext.JS_MISC, Hop.SPECULATIVE);
                    }
                    catch (URIException e) {
                        uriErrors.logUriError(e, curi.getUURI(), string);
                    }
                } else {
                    foundLinks += ExtractorJS.considerStrings(uriErrors, curi, subsequence, handlingJSFile);
                }
            }
            TextUtils.recycleMatcher((Matcher)uri);
        }
        TextUtils.recycleMatcher((Matcher)strings);
        return foundLinks;
    }

    public static String speculativeFixup(String string, ProcessorURI puri) {
        String retVal = string;
        Matcher m = TextUtils.getMatcher((String)"(?i)^https?%3A.*", (CharSequence)(retVal = TextUtils.replaceAll((String)ESCAPED_AMP, (CharSequence)retVal, (String)AMP)));
        if (m.matches()) {
            try {
                retVal = LaxURLCodec.DEFAULT.decode(retVal);
            }
            catch (DecoderException e) {
                LOGGER.log(Level.INFO, "unable to decode", e);
            }
        }
        TextUtils.recycleMatcher((Matcher)m);
        m = TextUtils.getMatcher((String)"^[^\\./:\\s%]+\\.[^/:\\s%]+\\.([^\\./:\\s%]+)(/.*|)$", (CharSequence)retVal);
        if (m.matches()) {
            String schemePlus = "http://";
            try {
                if (retVal.startsWith(puri.getUURI().getHost())) {
                    schemePlus = puri.getUURI().getScheme() + "://";
                }
            }
            catch (URIException e) {
                // empty catch block
            }
            retVal = schemePlus + retVal;
        }
        TextUtils.recycleMatcher((Matcher)m);
        return retVal;
    }

    public String report() {
        StringBuffer ret = new StringBuffer();
        ret.append("Processor: org.archive.crawler.extractor.ExtractorJS\n");
        ret.append("  Function:          Link extraction on JavaScript code\n");
        ret.append("  ProcessorURIs handled: " + this.numberOfCURIsHandled + "\n");
        ret.append("  Links extracted:   " + numberOfLinksExtracted + "\n\n");
        return ret.toString();
    }

    static {
        KeyManager.addKeys(ExtractorJS.class);
    }
}

