/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.ParseCallable;
import org.apache.nutch.parse.ParseException;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.ParserFactory;
import org.apache.nutch.parse.ParserNotFound;
import org.apache.nutch.protocol.Content;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParseUtil {
    public static final Logger LOG = LoggerFactory.getLogger(ParseUtil.class);
    private ParserFactory parserFactory;
    private int maxParseTime = 30;
    private ExecutorService executorService;

    public ParseUtil(Configuration conf) {
        this.parserFactory = new ParserFactory(conf);
        this.maxParseTime = conf.getInt("parser.timeout", 30);
        this.executorService = Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat("parse-%d").setDaemon(true).build());
    }

    public ParseResult parse(Content content) throws ParseException {
        Parser[] parsers = null;
        try {
            parsers = this.parserFactory.getParsers(content.getContentType(), content.getUrl() != null ? content.getUrl() : "");
        }
        catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
        ParseResult parseResult = null;
        for (int i = 0; i < parsers.length; ++i) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Parsing [" + content.getUrl() + "] with [" + parsers[i] + "]");
            }
            if ((parseResult = this.maxParseTime != -1 ? this.runParser(parsers[i], content) : parsers[i].getParse(content)) == null || parseResult.isEmpty()) continue;
            return parseResult;
        }
        if (LOG.isWarnEnabled()) {
            LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
        }
        return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
    }

    public ParseResult parseByExtensionId(String extId, Content content) throws ParseException {
        Parser p = null;
        try {
            p = this.parserFactory.getParserById(extId);
        }
        catch (ParserNotFound e) {
            if (LOG.isWarnEnabled()) {
                LOG.warn("No suitable parser found when trying to parse content " + content.getUrl() + " of type " + content.getContentType());
            }
            throw new ParseException(e.getMessage());
        }
        ParseResult parseResult = null;
        parseResult = this.maxParseTime != -1 ? this.runParser(p, content) : p.getParse(content);
        if (parseResult != null && !parseResult.isEmpty()) {
            return parseResult;
        }
        if (LOG.isWarnEnabled()) {
            LOG.warn("Unable to successfully parse content " + content.getUrl() + " of type " + content.getContentType());
        }
        return new ParseStatus(new ParseException("Unable to successfully parse content")).getEmptyParseResult(content.getUrl(), null);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private ParseResult runParser(Parser p, Content content) {
        ParseCallable pc = new ParseCallable(p, content);
        Future<ParseResult> task = this.executorService.submit(pc);
        ParseResult res = null;
        try {
            res = task.get(this.maxParseTime, TimeUnit.SECONDS);
        }
        catch (Exception e) {
            LOG.warn("Error parsing " + content.getUrl() + " with " + p, (Throwable)e);
            task.cancel(true);
        }
        finally {
            pc = null;
        }
        return res;
    }
}

