package it.unimi.dsi.mg4j.document;

import it.unimi.dsi.fastutil.chars.CharArrays;
import it.unimi.dsi.fastutil.objects.Reference2ObjectMap;
import it.unimi.dsi.mg4j.io.FastBufferedReader;
import it.unimi.dsi.mg4j.io.WordReader;
import it.unimi.dsi.mg4j.util.Properties;
import it.unimi.dsi.mg4j.util.parser.BulletParser;
import it.unimi.dsi.mg4j.util.parser.callback.TextExtractor;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.nio.charset.Charset;
import org.apache.commons.configuration.ConfigurationException;

/* loaded from: input_file:site-search/heritrix/lib/mg4j-1.0.1.jar:it/unimi/dsi/mg4j/document/HtmlDocumentFactory.class */
public class HtmlDocumentFactory extends PropertyBasedDocumentFactory {
    private transient BulletParser parser;
    private transient TextExtractor textExtractor;
    private WordReader wordReader;
    private transient char[] text;

    /* loaded from: input_file:site-search/heritrix/lib/mg4j-1.0.1.jar:it/unimi/dsi/mg4j/document/HtmlDocumentFactory$HtmlDocument.class */
    protected class HtmlDocument implements Document {
        private final InputStream rawContent;
        private final Reference2ObjectMap metadata;

        /* renamed from: this, reason: not valid java name */
        final HtmlDocumentFactory f6this;

        @Override // it.unimi.dsi.mg4j.document.Document
        public CharSequence title() {
            return (CharSequence) (this.f6this.textExtractor.title.length() == 0 ? this.f6this.resolve("title", this.metadata) : this.f6this.textExtractor.title);
        }

        public String toString() {
            return title().toString();
        }

        @Override // it.unimi.dsi.mg4j.document.Document
        public CharSequence uri() {
            return (CharSequence) this.f6this.resolve(DocumentMetadataConstants.URI, this.metadata);
        }

        @Override // it.unimi.dsi.mg4j.document.Document
        public Object content(int i) {
            this.f6this.ensureFieldIndex(i);
            return i == 0 ? new FastBufferedReader(this.f6this.textExtractor.text) : new FastBufferedReader(this.f6this.textExtractor.title);
        }

        @Override // it.unimi.dsi.mg4j.document.Document
        public WordReader wordReader(int i) {
            this.f6this.ensureFieldIndex(i);
            return this.f6this.wordReader;
        }

        @Override // it.unimi.dsi.mg4j.document.Document
        public void close() throws IOException {
            this.rawContent.close();
        }

        protected HtmlDocument(HtmlDocumentFactory htmlDocumentFactory, InputStream inputStream, Reference2ObjectMap reference2ObjectMap) throws IOException {
            this.f6this = htmlDocumentFactory;
            this.rawContent = inputStream;
            this.metadata = reference2ObjectMap;
            int i = 0;
            InputStreamReader inputStreamReader = new InputStreamReader(inputStream, (String) this.f6this.resolveNotNull("encoding", reference2ObjectMap));
            while (true) {
                int read = inputStreamReader.read(this.f6this.text, i, this.f6this.text.length - i);
                if (read <= 0) {
                    this.f6this.parser.parse(this.f6this.text, 0, i);
                    return;
                } else {
                    i += read;
                    this.f6this.text = CharArrays.grow(this.f6this.text, i + 1);
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // it.unimi.dsi.mg4j.document.PropertyBasedDocumentFactory
    public boolean parseProperty(String str, String[] strArr, Reference2ObjectMap reference2ObjectMap) throws ConfigurationException {
        if (str.equals(DocumentMetadataConstants.MIMETYPE)) {
            reference2ObjectMap.put(DocumentMetadataConstants.MIMETYPE, ensureJustOne(str, strArr));
            return true;
        }
        if (!str.equals("encoding")) {
            return super.parseProperty(str, strArr, reference2ObjectMap);
        }
        reference2ObjectMap.put("encoding", Charset.forName(ensureJustOne(str, strArr)).toString());
        return true;
    }

    private final void init() {
        this.parser = new BulletParser();
        this.textExtractor = new TextExtractor();
        this.wordReader = new FastBufferedReader();
        this.parser.setCallback(this.textExtractor);
        this.text = new char[16384];
    }

    @Override // it.unimi.dsi.mg4j.document.DocumentFactory
    public int numberOfFields() {
        return 2;
    }

    @Override // it.unimi.dsi.mg4j.document.DocumentFactory
    public String fieldName(int i) {
        ensureFieldIndex(i);
        return i == 0 ? "text" : "title";
    }

    @Override // it.unimi.dsi.mg4j.document.DocumentFactory
    public int fieldIndex(String str) {
        if ("text".equals(str)) {
            return 0;
        }
        return "title".equals(str) ? 1 : -1;
    }

    @Override // it.unimi.dsi.mg4j.document.DocumentFactory
    public int fieldType(int i) {
        ensureFieldIndex(i);
        return 0;
    }

    private final void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        this.parser = new BulletParser();
        this.textExtractor = new TextExtractor();
        this.parser.setCallback(this.textExtractor);
        this.text = new char[16384];
    }

    @Override // it.unimi.dsi.mg4j.document.DocumentFactory
    public Document getDocument(InputStream inputStream, Reference2ObjectMap reference2ObjectMap) throws IOException {
        return new HtmlDocument(this, inputStream, reference2ObjectMap);
    }

    public HtmlDocumentFactory(Properties properties) throws ConfigurationException {
        super(properties);
        init();
    }

    public HtmlDocumentFactory(Reference2ObjectMap reference2ObjectMap) {
        super(reference2ObjectMap);
        init();
    }

    public HtmlDocumentFactory(String[] strArr) throws ConfigurationException {
        super(strArr);
        init();
    }

    public HtmlDocumentFactory() {
        init();
    }
}
