package com.qwazr.extractor.parser;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.qwazr.extractor.ParserAbstract;
import com.qwazr.extractor.ParserField;
import com.qwazr.extractor.ParserFieldsBuilder;
import com.qwazr.extractor.ParserResultBuilder;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import javax.ws.rs.core.MultivaluedMap;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:com/qwazr/extractor/parser/TextParser.class */
public class TextParser extends ParserAbstract {
    private static final String[] DEFAULT_MIMETYPES = {"text/plain"};
    private static final String[] DEFAULT_EXTENSIONS = {"txt"};
    private static final ParserField CHARSET_DETECTION = ParserField.newString("charset_detection", "Detection of the charset");
    private static final ParserField[] FIELDS = {CONTENT, LANG_DETECTION, CHARSET_DETECTION};

    @Override // com.qwazr.extractor.ParserInterface
    public ParserField[] getParameters() {
        return null;
    }

    @Override // com.qwazr.extractor.ParserInterface
    public ParserField[] getFields() {
        return FIELDS;
    }

    @Override // com.qwazr.extractor.ParserInterface
    public String[] getDefaultExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    @Override // com.qwazr.extractor.ParserInterface
    public String[] getDefaultMimeTypes() {
        return DEFAULT_MIMETYPES;
    }

    @Override // com.qwazr.extractor.ParserInterface
    public void parseContent(MultivaluedMap<String, String> multivaluedMap, InputStream inputStream, String str, String str2, ParserResultBuilder parserResultBuilder) throws IOException {
        String iOUtils;
        parserResultBuilder.metas().set(MIME_TYPE, findMimeType(str, str2, this::findMimeTypeUsingDefault));
        CharsetDetector charsetDetector = new CharsetDetector();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        Throwable th = null;
        try {
            try {
                charsetDetector.setText(bufferedInputStream);
                CharsetMatch detect = charsetDetector.detect();
                ParserFieldsBuilder newDocument = parserResultBuilder.newDocument();
                if (detect != null) {
                    iOUtils = detect.getString();
                    newDocument.add(CHARSET_DETECTION, detect.getName());
                } else {
                    bufferedInputStream.reset();
                    iOUtils = IOUtils.toString(bufferedInputStream, Charset.defaultCharset());
                }
                newDocument.add(CONTENT, iOUtils);
                newDocument.add(LANG_DETECTION, languageDetection(newDocument, CONTENT, 10000));
                if (bufferedInputStream != null) {
                    if (0 == 0) {
                        bufferedInputStream.close();
                        return;
                    }
                    try {
                        bufferedInputStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
            } catch (Throwable th3) {
                th = th3;
                throw th3;
            }
        } catch (Throwable th4) {
            if (bufferedInputStream != null) {
                if (th != null) {
                    try {
                        bufferedInputStream.close();
                    } catch (Throwable th5) {
                        th.addSuppressed(th5);
                    }
                } else {
                    bufferedInputStream.close();
                }
            }
            throw th4;
        }
    }
}
