/*
 * Decompiled with CFR 0.152.
 */
package org.archive.modules.writer;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.lang.StringUtils;
import org.archive.io.ReplayInputStream;
import org.archive.io.WriterPool;
import org.archive.io.WriterPoolMember;
import org.archive.io.WriterPoolSettings;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPool;
import org.archive.modules.ProcessResult;
import org.archive.modules.ProcessorURI;
import org.archive.modules.deciderules.recrawl.IdenticalDigestDecideRule;
import org.archive.modules.extractor.Link;
import org.archive.modules.writer.MetadataProvider;
import org.archive.modules.writer.WriterPoolProcessor;
import org.archive.state.Expert;
import org.archive.state.Global;
import org.archive.state.Key;
import org.archive.state.KeyManager;
import org.archive.state.StateProvider;
import org.archive.uid.GeneratorFactory;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.ANVLRecord;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class WARCWriterProcessor
extends WriterPoolProcessor {
    private static final long serialVersionUID = 6182850087635847443L;
    private static final Logger logger = Logger.getLogger(WARCWriterProcessor.class.getName());
    @Expert
    public static final Key<Boolean> WRITE_REQUESTS = Key.make((boolean)true);
    @Expert
    public static final Key<Boolean> WRITE_METADATA = Key.make((boolean)true);
    @Expert
    public static final Key<Boolean> WRITE_REVISIT_FOR_IDENTICAL_DIGESTS = Key.make((boolean)true);
    @Expert
    public static final Key<Boolean> WRITE_REVISIT_FOR_NOT_MODIFIED = Key.make((boolean)true);
    private static final String[] DEFAULT_PATH = new String[]{"warcs"};
    @Global
    public static final Key<List<String>> PATH = Key.makeSimpleList(String.class, (Object[])new String[]{"warcs"});
    private transient List<String> cachedMetadata;

    protected String[] getDefaultPath() {
        return DEFAULT_PATH;
    }

    @Override
    protected void setupPool(AtomicInteger serialNo) {
        int maxActive = this.getMaxActive();
        int maxWait = this.getMaxWait();
        WriterPoolSettings wps = this.getWriterPoolSettings();
        this.setPool((WriterPool)new WARCWriterPool(serialNo, wps, maxActive, maxWait));
    }

    @Override
    protected ProcessResult innerProcessResult(ProcessorURI puri) {
        ProcessorURI curi = puri;
        String scheme = curi.getUURI().getScheme().toLowerCase();
        try {
            if (this.shouldWrite(curi)) {
                return this.write(scheme, curi);
            }
            logger.info("This writer does not write out scheme " + scheme + " content");
        }
        catch (IOException e) {
            curi.getNonFatalFailures().add(e);
            logger.log(Level.SEVERE, "Failed write of Record: " + curi.toString(), e);
        }
        return ProcessResult.PROCEED;
    }

    protected ProcessResult write(String lowerCaseScheme, ProcessorURI curi) throws IOException {
        WriterPoolMember writer = this.getPool().borrowFile();
        long position = writer.getPosition();
        writer.checkSize();
        if (writer.getPosition() != position) {
            this.setTotalBytesWritten(this.getTotalBytesWritten() + (writer.getPosition() - position));
            position = writer.getPosition();
        }
        WARCWriter w = (WARCWriter)writer;
        try {
            URI baseid = this.getRecordID();
            String timestamp = ArchiveUtils.getLog14Date((long)curi.getFetchBeginTime());
            if (lowerCaseScheme.startsWith("http")) {
                URI rid;
                ANVLRecord headers = new ANVLRecord(5);
                if (curi.getContentDigest() != null) {
                    headers.addLabelValue("WARC-Payload-Digest", curi.getContentDigestSchemeString());
                }
                headers.addLabelValue("WARC-IP-Address", this.getHostAddress(curi));
                if (IdenticalDigestDecideRule.hasIdenticalDigest(curi) && ((Boolean)curi.get(this, WRITE_REVISIT_FOR_IDENTICAL_DIGESTS)).booleanValue()) {
                    rid = this.writeRevisitDigest(w, timestamp, "application/http; msgtype=response", baseid, curi, headers);
                } else if (curi.getFetchStatus() == 304 && ((Boolean)curi.get(this, WRITE_REVISIT_FOR_NOT_MODIFIED)).booleanValue()) {
                    rid = this.writeRevisitNotModified(w, timestamp, baseid, curi, headers);
                } else {
                    String value = null;
                    Collection<String> anno = curi.getAnnotations();
                    if (anno.contains("timeTrunc")) {
                        value = "time";
                    } else if (anno.contains("lenTrunc")) {
                        value = "length";
                    } else if (anno.contains("headerTrunc")) {
                        value = "long-headers";
                    }
                    if (value != null) {
                        headers.addLabelValue("WARC-Truncated", value);
                    }
                    rid = this.writeResponse(w, timestamp, "application/http; msgtype=response", baseid, curi, headers);
                }
                headers = new ANVLRecord(1);
                headers.addLabelValue("WARC-Concurrent-To", '<' + rid.toString() + '>');
                if (((Boolean)curi.get(this, WRITE_REQUESTS)).booleanValue()) {
                    this.writeRequest(w, timestamp, "application/http; msgtype=request", baseid, curi, headers);
                }
                if (((Boolean)curi.get(this, WRITE_METADATA)).booleanValue()) {
                    this.writeMetadata(w, timestamp, baseid, curi, headers);
                }
            } else if (lowerCaseScheme.equals("dns")) {
                ANVLRecord headers = null;
                String ip = (String)curi.getData().get("dns-server-ip");
                if (ip != null && ip.length() > 0) {
                    headers = new ANVLRecord(1);
                    headers.addLabelValue("WARC-IP-Address", ip);
                }
                this.writeResponse(w, timestamp, curi.getContentType(), baseid, curi, headers);
            } else {
                logger.warning("No handler for scheme " + lowerCaseScheme);
            }
        }
        catch (IOException e) {
            this.getPool().invalidateFile(writer);
            writer = null;
            throw e;
        }
        finally {
            if (writer != null) {
                this.setTotalBytesWritten(this.getTotalBytesWritten() + (writer.getPosition() - position));
                this.getPool().returnFile(writer);
            }
        }
        return this.checkBytesWritten(curi);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected URI writeRequest(WARCWriter w, String timestamp, String mimetype, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        URI uid = this.qualifyRecordID(baseid, "type", "request");
        ReplayInputStream ris = curi.getRecorder().getRecordedOutput().getReplayInputStream();
        try {
            w.writeRequestRecord(curi.toString(), timestamp, mimetype, uid, namedFields, (InputStream)ris, curi.getRecorder().getRecordedOutput().getSize());
        }
        finally {
            if (ris != null) {
                ris.close();
            }
        }
        return uid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected URI writeResponse(WARCWriter w, String timestamp, String mimetype, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        ReplayInputStream ris = curi.getRecorder().getRecordedInput().getReplayInputStream();
        try {
            w.writeResponseRecord(curi.toString(), timestamp, mimetype, baseid, namedFields, (InputStream)ris, curi.getRecorder().getRecordedInput().getSize());
        }
        finally {
            if (ris != null) {
                ris.close();
            }
        }
        return baseid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected URI writeResource(WARCWriter w, String timestamp, String mimetype, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        ReplayInputStream ris = curi.getRecorder().getRecordedInput().getReplayInputStream();
        try {
            w.writeResourceRecord(curi.toString(), timestamp, mimetype, baseid, namedFields, (InputStream)ris, curi.getRecorder().getRecordedInput().getSize());
        }
        finally {
            if (ris != null) {
                ris.close();
            }
        }
        return baseid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected URI writeRevisitDigest(WARCWriter w, String timestamp, String mimetype, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        long revisedLength = curi.getRecorder().getRecordedInput().getContentBegin();
        revisedLength = revisedLength > 0L ? revisedLength : curi.getRecorder().getRecordedInput().getSize();
        namedFields.addLabelValue("WARC-Profile", "http://netpreserve.org/warc/0.17/revisit/identical-payload-digest");
        namedFields.addLabelValue("WARC-Truncated", "length");
        ReplayInputStream ris = curi.getRecorder().getRecordedInput().getReplayInputStream();
        try {
            w.writeRevisitRecord(curi.toString(), timestamp, mimetype, baseid, namedFields, (InputStream)ris, revisedLength);
        }
        finally {
            if (ris != null) {
                ris.close();
            }
        }
        curi.getAnnotations().add("warcRevisit:digest");
        return baseid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected URI writeRevisitNotModified(WARCWriter w, String timestamp, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        namedFields.addLabelValue("WARC-Profile", "http://netpreserve.org/warc/0.17/revisit/server-not-modified");
        if (curi.containsDataKey("http-transaction")) {
            HttpMethodBase method = (HttpMethodBase)curi.getData().get("http-transaction");
            this.saveHeader("etag", method, namedFields, "WARC-Etag");
            this.saveHeader("last-modified", method, namedFields, "WARC-Last-Modified");
        }
        namedFields.addLabelValue("WARC-Truncated", "length");
        ReplayInputStream ris = curi.getRecorder().getRecordedInput().getReplayInputStream();
        try {
            w.writeRevisitRecord(curi.toString(), timestamp, null, baseid, namedFields, (InputStream)ris, 0L);
        }
        finally {
            if (ris != null) {
                ris.close();
            }
        }
        curi.getAnnotations().add("warcRevisit:notModified");
        return baseid;
    }

    protected void saveHeader(String origName, HttpMethodBase method, ANVLRecord headers, String newName) {
        Header header = method.getResponseHeader(origName);
        if (header != null) {
            headers.addLabelValue(newName, header.getValue());
        }
    }

    protected URI writeMetadata(WARCWriter w, String timestamp, URI baseid, ProcessorURI curi, ANVLRecord namedFields) throws IOException {
        Collection<Link> links;
        URI uid = this.qualifyRecordID(baseid, "type", "metadata");
        ANVLRecord r = new ANVLRecord();
        if (curi.isSeed()) {
            r.addLabel("seed");
        } else {
            if (curi.forceFetch()) {
                r.addLabel("force-fetch");
            }
            r.addLabelValue("via", WARCWriterProcessor.flattenVia(curi));
            r.addLabelValue("hopsFromSeed", curi.getPathFromSeed());
            if (curi.containsDataKey("source")) {
                r.addLabelValue("sourceTag", (String)curi.getData().get("source"));
            }
        }
        long duration = curi.getFetchCompletedTime() - curi.getFetchBeginTime();
        if (duration > -1L) {
            r.addLabelValue("fetchTimeMs", Long.toString(duration));
        }
        if ((links = curi.getOutLinks()) != null && links.size() > 0) {
            for (Link link : links) {
                r.addLabelValue("outlink", link.toString());
            }
        }
        byte[] b = r.getUTF8Bytes();
        w.writeMetadataRecord(curi.toString(), timestamp, "application/warc-fields", uid, namedFields, (InputStream)new ByteArrayInputStream(b), (long)b.length);
        return uid;
    }

    protected URI getRecordID() throws IOException {
        URI result;
        try {
            result = GeneratorFactory.getFactory().getRecordID();
        }
        catch (URISyntaxException e) {
            throw new IOException(e.toString());
        }
        return result;
    }

    protected URI qualifyRecordID(URI base, String key, String value) throws IOException {
        URI result;
        HashMap<String, String> qualifiers = new HashMap<String, String>(1);
        qualifiers.put(key, value);
        try {
            result = GeneratorFactory.getFactory().qualifyRecordID(base, qualifiers);
        }
        catch (URISyntaxException e) {
            throw new IOException(e.toString());
        }
        return result;
    }

    @Override
    protected Key<List<String>> getPathKey() {
        return PATH;
    }

    @Override
    public List<String> getMetadata(StateProvider global) {
        if (this.cachedMetadata != null) {
            return this.cachedMetadata;
        }
        ANVLRecord record = new ANVLRecord(7);
        record.addLabelValue("software", "Heritrix/" + ArchiveUtils.VERSION + " http://crawler.archive.org");
        try {
            InetAddress host = InetAddress.getLocalHost();
            record.addLabelValue("ip", host.getHostAddress());
            record.addLabelValue("hostname", host.getHostName());
        }
        catch (UnknownHostException e) {
            logger.log(Level.WARNING, "unable top obtain local crawl engine host", e);
        }
        record.addLabelValue("format", "WARC File Format 0.17");
        record.addLabelValue("conformsTo", "http://crawler.archive.org/warc/0.17/WARC0.17ISO.doc");
        MetadataProvider provider = (MetadataProvider)global.get((Object)this, METADATA_PROVIDER);
        this.addIfNotBlank(record, "operator", provider.getJobOperator());
        this.addIfNotBlank(record, "publisher", provider.getOrganization());
        this.addIfNotBlank(record, "audience", provider.getAudience());
        this.addIfNotBlank(record, "isPartOf", provider.getJobName());
        this.addIfNotBlank(record, "description", provider.getJobDescription());
        this.addIfNotBlank(record, "robots", provider.getRobotsPolicy());
        this.addIfNotBlank(record, "http-header-user-agent", provider.getUserAgent());
        this.addIfNotBlank(record, "http-header-from", provider.getFrom());
        return Collections.singletonList(record.toString());
    }

    protected void addIfNotBlank(ANVLRecord record, String label, String value) {
        if (StringUtils.isNotBlank((String)value)) {
            record.addLabelValue(label, value);
        }
    }

    static {
        KeyManager.addKeys(WARCWriterProcessor.class);
        KeyManager.addKeys(WARCWriterProcessor.class);
    }
}

