/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.document;

import it.unimi.dsi.fastutil.chars.CharArrays;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.mg4j.document.Document;
import it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory;
import it.unimi.dsi.mg4j.io.FastBufferedReader;
import it.unimi.dsi.mg4j.io.WordReader;
import it.unimi.dsi.mg4j.util.Properties;
import it.unimi.dsi.mg4j.util.parser.BulletParser;
import it.unimi.dsi.mg4j.util.parser.callback.TextExtractor;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import org.apache.commons.configuration.ConfigurationException;

public class HtmlDocumentFactory
extends PropertyBasedDocumentFactory {
    private transient BulletParser parser;
    private transient TextExtractor textExtractor;
    private WordReader wordReader;
    private transient char[] text;

    protected boolean parseProperty(String key, String[] values, Reference2ObjectMap metadata) throws ConfigurationException {
        if (key.equals("mimetype")) {
            metadata.put((Object)"mimetype", (Object)HtmlDocumentFactory.ensureJustOne(key, values));
            return true;
        }
        if (key.equals("encoding")) {
            metadata.put((Object)"encoding", (Object)Charset.forName(HtmlDocumentFactory.ensureJustOne(key, values)).toString());
            return true;
        }
        return super.parseProperty(key, values, metadata);
    }

    private final void init() {
        this.parser = new BulletParser();
        this.textExtractor = new TextExtractor();
        this.wordReader = new FastBufferedReader();
        this.parser.setCallback(this.textExtractor);
        this.text = new char[16384];
    }

    public int numberOfFields() {
        return 2;
    }

    public String fieldName(int field) {
        this.ensureFieldIndex(field);
        return field == 0 ? "text" : "title";
    }

    public int fieldIndex(String fieldName) {
        int n = 0;
        if (!"text".equals(fieldName)) {
            n = "title".equals(fieldName) ? 1 : -1;
        }
        return n;
    }

    public int fieldType(int field) {
        this.ensureFieldIndex(field);
        return 0;
    }

    private final void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException {
        s.defaultReadObject();
        this.parser = new BulletParser();
        this.textExtractor = new TextExtractor();
        this.parser.setCallback(this.textExtractor);
        this.text = new char[16384];
    }

    public Document getDocument(InputStream rawContent, Reference2ObjectMap metadata) throws IOException {
        return new HtmlDocument(rawContent, metadata);
    }

    public HtmlDocumentFactory(Properties properties) throws ConfigurationException {
        super(properties);
        this.init();
    }

    public HtmlDocumentFactory(Reference2ObjectMap defaultMetadata) {
        super(defaultMetadata);
        this.init();
    }

    public HtmlDocumentFactory(String[] property) throws ConfigurationException {
        super(property);
        this.init();
    }

    public HtmlDocumentFactory() {
        this.init();
    }

    /*
     * Illegal identifiers - consider using --renameillegalidents true
     */
    protected class HtmlDocument
    implements Document {
        private final InputStream rawContent;
        private final Reference2ObjectMap metadata;

        public CharSequence title() {
            return (CharSequence)(((HtmlDocumentFactory)HtmlDocumentFactory.this).textExtractor.title.length() == 0 ? HtmlDocumentFactory.this.resolve("title", this.metadata) : ((HtmlDocumentFactory)HtmlDocumentFactory.this).textExtractor.title);
        }

        public String toString() {
            return this.title().toString();
        }

        public CharSequence uri() {
            return (CharSequence)HtmlDocumentFactory.this.resolve("uri", this.metadata);
        }

        public Object content(int field) {
            HtmlDocumentFactory.this.ensureFieldIndex(field);
            if (field == 0) {
                return new FastBufferedReader(((HtmlDocumentFactory)HtmlDocumentFactory.this).textExtractor.text);
            }
            return new FastBufferedReader(((HtmlDocumentFactory)HtmlDocumentFactory.this).textExtractor.title);
        }

        public WordReader wordReader(int field) {
            HtmlDocumentFactory.this.ensureFieldIndex(field);
            return HtmlDocumentFactory.this.wordReader;
        }

        public void close() throws IOException {
            this.rawContent.close();
        }

        protected HtmlDocument(InputStream rawContent, Reference2ObjectMap metadata) throws IOException {
            int l;
            this.rawContent = rawContent;
            this.metadata = metadata;
            String charset = (String)HtmlDocumentFactory.this.resolveNotNull("encoding", metadata);
            int offset = 0;
            InputStreamReader r = new InputStreamReader(rawContent, charset);
            while ((l = ((Reader)r).read(HtmlDocumentFactory.this.text, offset, HtmlDocumentFactory.this.text.length - offset)) > 0) {
                HtmlDocumentFactory.this.text = CharArrays.grow((char[])HtmlDocumentFactory.this.text, (int)((offset += l) + 1));
            }
            HtmlDocumentFactory.this.parser.parse(HtmlDocumentFactory.this.text, 0, offset);
        }
    }
}

