/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.urlfilter.validator;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.URLFilter;

public class UrlValidator
implements URLFilter {
    private static final String ALPHA_CHARS = "a-zA-Z";
    private static final String ALPHA_NUMERIC_CHARS = "a-zA-Z\\d";
    private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
    private static final String VALID_CHARS = "[^\\s;/@&=,.?:+$]";
    private static final String SCHEME_CHARS = "a-zA-Z";
    private static final String AUTHORITY_CHARS = "a-zA-Z\\d\\-\\.";
    private static final String ATOM = "[^\\s;/@&=,.?:+$]+";
    private static final Pattern URL_PATTERN = Pattern.compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?");
    private static final int PARSE_URL_SCHEME = 2;
    private static final int PARSE_URL_AUTHORITY = 4;
    private static final int PARSE_URL_PATH = 5;
    private static final int PARSE_URL_QUERY = 7;
    private static final Pattern SCHEME_PATTERN = Pattern.compile("^[a-zA-Z]+");
    private static final Pattern AUTHORITY_PATTERN = Pattern.compile("^([a-zA-Z\\d\\-\\.]*)(:\\d*)?(.*)?");
    private static final int PARSE_AUTHORITY_HOST_IP = 1;
    private static final int PARSE_AUTHORITY_PORT = 2;
    private static final int PARSE_AUTHORITY_EXTRA = 3;
    private static final Pattern PATH_PATTERN = Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
    private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
    private static final Pattern LEGAL_ASCII_PATTERN = Pattern.compile("^[\\x21-\\x7E]+$");
    private static final Pattern IP_V4_DOMAIN_PATTERN = Pattern.compile("^(\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})[.](\\d{1,3})$");
    private static final Pattern DOMAIN_PATTERN = Pattern.compile("^[^\\s;/@&=,.?:+$]+(\\.[^\\s;/@&=,.?:+$]+)*$");
    private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
    private static final Pattern ATOM_PATTERN = Pattern.compile("([^\\s;/@&=,.?:+$]+)");
    private static final Pattern ALPHA_PATTERN = Pattern.compile("^[a-zA-Z]");
    private Configuration conf;

    public String filter(String urlString) {
        return this.isValid(urlString) ? urlString : null;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
    }

    private boolean isValid(String value) {
        if (value == null) {
            return false;
        }
        Matcher matchUrlPat = URL_PATTERN.matcher(value);
        if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
            return false;
        }
        if (!matchUrlPat.matches()) {
            return false;
        }
        if (!this.isValidScheme(matchUrlPat.group(2))) {
            return false;
        }
        if (!this.isValidAuthority(matchUrlPat.group(4))) {
            return false;
        }
        if (!this.isValidPath(matchUrlPat.group(5))) {
            return false;
        }
        return this.isValidQuery(matchUrlPat.group(7));
    }

    private boolean isValidScheme(String scheme) {
        if (scheme == null) {
            return false;
        }
        return SCHEME_PATTERN.matcher(scheme).matches();
    }

    private boolean isValidAuthority(String authority) {
        if (authority == null) {
            return false;
        }
        Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
        if (!authorityMatcher.matches()) {
            return false;
        }
        boolean ipV4Address = false;
        boolean hostname = false;
        String hostIP = authorityMatcher.group(1);
        Matcher matchIPV4Pat = IP_V4_DOMAIN_PATTERN.matcher(hostIP);
        ipV4Address = matchIPV4Pat.matches();
        if (ipV4Address) {
            for (int i = 1; i <= 4; ++i) {
                String ipSegment = matchIPV4Pat.group(i);
                if (ipSegment == null || ipSegment.length() <= 0) {
                    return false;
                }
                try {
                    if (Integer.parseInt(ipSegment) <= 255) continue;
                    return false;
                }
                catch (NumberFormatException e) {
                    return false;
                }
            }
        } else {
            hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
        }
        if (hostname) {
            char[] chars = hostIP.toCharArray();
            int size = 1;
            for (int i = 0; i < chars.length; ++i) {
                if (chars[i] != '.') continue;
                ++size;
            }
            String[] domainSegment = new String[size];
            int segCount = 0;
            int segLen = 0;
            Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
            while (atomMatcher.find()) {
                domainSegment[segCount] = atomMatcher.group();
                segLen = domainSegment[segCount].length() + 1;
                hostIP = segLen >= hostIP.length() ? "" : hostIP.substring(segLen);
                ++segCount;
            }
            String topLevel = domainSegment[segCount - 1];
            if (topLevel.length() < 2 || topLevel.length() > 4) {
                return false;
            }
            if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
                return false;
            }
            if (segCount < 2) {
                return false;
            }
        }
        if (!hostname && !ipV4Address) {
            return false;
        }
        String port = authorityMatcher.group(2);
        if (port != null && !PORT_PATTERN.matcher(port).matches()) {
            return false;
        }
        String extra = authorityMatcher.group(3);
        return this.isBlankOrNull(extra);
    }

    private boolean isBlankOrNull(String value) {
        return value == null || value.trim().length() == 0;
    }

    private boolean isValidPath(String path) {
        if (path == null) {
            return false;
        }
        if (!PATH_PATTERN.matcher(path).matches()) {
            return false;
        }
        int slash2Count = this.countToken("//", path);
        int slashCount = this.countToken("/", path);
        int dot2Count = this.countToken("..", path);
        return dot2Count <= 0 || slashCount - slash2Count - 1 > dot2Count;
    }

    private boolean isValidQuery(String query) {
        if (query == null) {
            return true;
        }
        return QUERY_PATTERN.matcher(query).matches();
    }

    private int countToken(String token, String target) {
        int tokenIndex = 0;
        int count = 0;
        while (tokenIndex != -1) {
            if ((tokenIndex = target.indexOf(token, tokenIndex)) <= -1) continue;
            ++tokenIndex;
            ++count;
        }
        return count;
    }
}

