package com.qwazr.extractor.parser;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.qwazr.extractor.ParserAbstract;
import com.qwazr.extractor.ParserDocument;
import com.qwazr.extractor.ParserField;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:com/qwazr/extractor/parser/Text.class */
public class Text extends ParserAbstract {
    public static final String[] DEFAULT_MIMETYPES = {"text/plain"};
    public static final String[] DEFAULT_EXTENSIONS = {"txt"};
    protected static final ParserField CONTENT = ParserField.newString("content", "The content of the document");
    protected static final ParserField LANG_DETECTION = ParserField.newString("lang_detection", "Detection of the language");
    protected static final ParserField CHARSET_DETECTION = ParserField.newString("charset_detection", "Detection of the charset");
    protected static final ParserField[] FIELDS = {CONTENT, LANG_DETECTION, CHARSET_DETECTION};

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getParameters() {
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getFields() {
        return FIELDS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultMimeTypes() {
        return DEFAULT_MIMETYPES;
    }

    @Override // com.qwazr.extractor.ParserAbstract
    protected void parseContent(InputStream inputStream, String str, String str2) throws IOException {
        String iOUtils;
        CharsetDetector charsetDetector = new CharsetDetector();
        BufferedInputStream bufferedInputStream = null;
        try {
            bufferedInputStream = new BufferedInputStream(inputStream);
            charsetDetector.setText(bufferedInputStream);
            CharsetMatch detect = charsetDetector.detect();
            ParserDocument newParserDocument = getNewParserDocument();
            if (detect != null) {
                iOUtils = detect.getString();
                newParserDocument.add(CHARSET_DETECTION, detect.getName());
            } else {
                bufferedInputStream.reset();
                iOUtils = IOUtils.toString(bufferedInputStream);
            }
            newParserDocument.add(CONTENT, iOUtils);
            newParserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
            IOUtils.closeQuietly(bufferedInputStream);
        } catch (Throwable th) {
            IOUtils.closeQuietly(bufferedInputStream);
            throw th;
        }
    }
}
