/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse.ext;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Hashtable;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.OutlinkExtractor;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginRepository;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.CommandRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ExtParser
implements Parser {
    public static final Logger LOG = LoggerFactory.getLogger((String)"org.apache.nutch.parse.ext");
    static final int BUFFER_SIZE = 4096;
    static final int TIMEOUT_DEFAULT = 30;
    Hashtable<String, String[]> TYPE_PARAMS_MAP = new Hashtable();
    private Configuration conf;

    public ParseResult getParse(Content content) {
        String contentType = content.getContentType();
        String[] params = this.TYPE_PARAMS_MAP.get(contentType);
        if (params == null) {
            return new ParseStatus(2, "No external command defined for contentType: " + contentType).getEmptyParseResult(content.getUrl(), this.getConf());
        }
        String command = params[0];
        int timeout = Integer.parseInt(params[1]);
        String encoding = params[2];
        if (LOG.isTraceEnabled()) {
            LOG.trace("Use " + command + " with timeout=" + timeout + "secs");
        }
        String text = null;
        String title = null;
        try {
            byte[] raw = content.getContent();
            String contentLength = content.getMetadata().get("Content-Length");
            if (contentLength != null && raw.length != Integer.parseInt(contentLength)) {
                return new ParseStatus(2, 202, "Content truncated at " + raw.length + " bytes. Parser can't handle incomplete " + contentType + " file.").getEmptyParseResult(content.getUrl(), this.getConf());
            }
            ByteArrayOutputStream os = new ByteArrayOutputStream(4096);
            ByteArrayOutputStream es = new ByteArrayOutputStream(1024);
            CommandRunner cr = new CommandRunner();
            cr.setCommand(command + " " + contentType);
            cr.setInputStream((InputStream)new ByteArrayInputStream(raw));
            cr.setStdOutputStream((OutputStream)os);
            cr.setStdErrorStream((OutputStream)es);
            cr.setTimeout(timeout);
            cr.evaluate();
            if (cr.getExitValue() != 0) {
                return new ParseStatus(2, "External command " + command + " failed with error: " + es.toString()).getEmptyParseResult(content.getUrl(), this.getConf());
            }
            text = os.toString(encoding);
        }
        catch (Exception e) {
            return new ParseStatus((Throwable)e).getEmptyParseResult(content.getUrl(), this.getConf());
        }
        if (text == null) {
            text = "";
        }
        if (title == null) {
            title = "";
        }
        Outlink[] outlinks = OutlinkExtractor.getOutlinks((String)text, (Configuration)this.getConf());
        ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, content.getMetadata());
        return ParseResult.createParseResult((String)content.getUrl(), (Parse)new ParseImpl(text, parseData));
    }

    public void setConf(Configuration conf) {
        this.conf = conf;
        Extension[] extensions = PluginRepository.get((Configuration)conf).getExtensionPoint("org.apache.nutch.parse.Parser").getExtensions();
        for (int i = 0; i < extensions.length; ++i) {
            String timeoutString;
            String command;
            String contentType;
            Extension extension = extensions[i];
            if (!extension.getDescriptor().getPluginId().equals("parse-ext") || (contentType = extension.getAttribute("contentType")) == null || contentType.equals("") || (command = extension.getAttribute("command")) == null || command.equals("")) continue;
            String encoding = extension.getAttribute("encoding");
            if (encoding == null) {
                encoding = Charset.defaultCharset().name();
            }
            if ((timeoutString = extension.getAttribute("timeout")) == null || timeoutString.equals("")) {
                timeoutString = "30";
            }
            this.TYPE_PARAMS_MAP.put(contentType, new String[]{command, timeoutString, encoding});
        }
    }

    public Configuration getConf() {
        return this.conf;
    }
}

