/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.parse;

import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.SignatureFactory;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseSegment;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ParserChecker
implements Tool {
    public static final Logger LOG = LoggerFactory.getLogger(ParserChecker.class);
    private Configuration conf;

    public int run(String[] args) throws Exception {
        ParseResult parseResult;
        ProtocolFactory factory;
        Protocol protocol;
        ProtocolOutput output;
        boolean dumpText = false;
        boolean force = false;
        String contentType = null;
        String url = null;
        String usage = "Usage: ParserChecker [-dumpText] [-forceAs mimeType] url";
        if (args.length == 0) {
            LOG.error(usage);
            return -1;
        }
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-forceAs")) {
                force = true;
                contentType = args[++i];
                continue;
            }
            if (args[i].equals("-dumpText")) {
                dumpText = true;
                continue;
            }
            if (i != args.length - 1) {
                LOG.error(usage);
                System.exit(-1);
                continue;
            }
            url = URLUtil.toASCII(args[i]);
        }
        if (LOG.isInfoEnabled()) {
            LOG.info("fetching: " + url);
        }
        if (!(output = (protocol = (factory = new ProtocolFactory(this.conf)).getProtocol(url)).getProtocolOutput(new Text(url), new CrawlDatum())).getStatus().isSuccess()) {
            System.err.println("Fetch failed with protocol status: " + output.getStatus());
            return -1;
        }
        Content content = output.getContent();
        if (content == null) {
            LOG.error("No content for " + url);
            return -1;
        }
        if (force) {
            content.setContentType(contentType);
        } else {
            contentType = content.getContentType();
        }
        if (contentType == null) {
            LOG.error("Failed to determine content type!");
            return -1;
        }
        if (ParseSegment.isTruncated(content)) {
            LOG.warn("Content is truncated, parse may fail!");
        }
        if ((parseResult = new ParseUtil(this.conf).parse(content)) == null) {
            LOG.error("Problem with parse - check log");
            return -1;
        }
        byte[] signature = SignatureFactory.getSignature(this.getConf()).calculate(content, parseResult.get(new Text(url)));
        if (LOG.isInfoEnabled()) {
            LOG.info("parsing: " + url);
            LOG.info("contentType: " + contentType);
            LOG.info("signature: " + StringUtil.toHexString(signature));
        }
        for (Map.Entry<Text, Parse> entry : parseResult) {
            Parse parse = entry.getValue();
            LOG.info("---------\nUrl\n---------------\n");
            System.out.print(entry.getKey());
            LOG.info("\n---------\nParseData\n---------\n");
            System.out.print(parse.getData().toString());
            if (!dumpText) continue;
            LOG.info("---------\nParseText\n---------\n");
            System.out.print(parse.getText());
        }
        return 0;
    }

    public Configuration getConf() {
        return this.conf;
    }

    public void setConf(Configuration c) {
        this.conf = c;
    }

    public static void main(String[] args) throws Exception {
        int res = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new ParserChecker(), (String[])args);
        System.exit(res);
    }
}

