/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse;

import java.net.MalformedURLException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.Outlink;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternMatcherInput;
import org.apache.oro.text.regex.Perl5Compiler;
import org.apache.oro.text.regex.Perl5Matcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OutlinkExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(OutlinkExtractor.class);
    private static final String URL_PATTERN = "([A-Za-z][A-Za-z0-9+.-]{1,120}:[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)";

    public static Outlink[] getOutlinks(String plainText, Configuration conf) {
        return OutlinkExtractor.getOutlinks(plainText, "", conf);
    }

    public static Outlink[] getOutlinks(String plainText, String anchor, Configuration conf) {
        ArrayList<Outlink> outlinks;
        block7: {
            long start = System.currentTimeMillis();
            outlinks = new ArrayList<Outlink>();
            try {
                Perl5Compiler cp = new Perl5Compiler();
                Pattern pattern = cp.compile(URL_PATTERN, 32777);
                Perl5Matcher matcher = new Perl5Matcher();
                PatternMatcherInput input = new PatternMatcherInput(plainText);
                while (matcher.contains(input, pattern)) {
                    if (System.currentTimeMillis() - start >= 60000L) {
                        if (LOG.isWarnEnabled()) {
                            LOG.warn("Time limit exceeded for getOutLinks");
                        }
                        break;
                    }
                    MatchResult result = matcher.getMatch();
                    String url = result.group(0);
                    try {
                        outlinks.add(new Outlink(url, anchor));
                    }
                    catch (MalformedURLException mue) {
                        LOG.warn("Invalid url: '" + url + "', skipping.");
                    }
                }
            }
            catch (Exception ex) {
                if (!LOG.isErrorEnabled()) break block7;
                LOG.error("getOutlinks", (Throwable)ex);
            }
        }
        Outlink[] retval = outlinks != null && outlinks.size() > 0 ? outlinks.toArray(new Outlink[0]) : new Outlink[]{};
        return retval;
    }

    @Deprecated
    private Outlink[] getOutlinksJakartaRegexpImpl(String plainText) {
        throw new UnsupportedOperationException("Implementation commented out. Please uncomment to use it.");
    }

    @Deprecated
    private Outlink[] getOutlinksJDK5Impl(String plainText) {
        throw new UnsupportedOperationException("Implementation commented out. Please uncomment to use it.");
    }
}

