package com.qwazr.extractor.parser;

import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.qwazr.extractor.ParserFactory;
import com.qwazr.extractor.ParserField;
import com.qwazr.extractor.ParserInterface;
import com.qwazr.extractor.ParserResult;
import com.qwazr.extractor.ParserUtils;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Collection;
import java.util.List;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.MultivaluedMap;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:com/qwazr/extractor/parser/TextParser.class */
public class TextParser implements ParserFactory, ParserInterface {
    private static final String NAME = "text";
    private static final List<MediaType> DEFAULT_MIMETYPES = List.of(MediaType.TEXT_PLAIN_TYPE);
    private static final List<String> DEFAULT_EXTENSIONS = List.of("txt");
    private static final ParserField CHARSET_DETECTION = ParserField.newString("charset_detection", "Detection of the charset");
    private static final Collection<ParserField> FIELDS = List.of(CONTENT, LANG_DETECTION, CHARSET_DETECTION);

    @Override // com.qwazr.extractor.ParserFactory
    public Collection<ParserField> getFields() {
        return FIELDS;
    }

    @Override // com.qwazr.extractor.ParserFactory
    public Collection<String> getSupportedFileExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    @Override // com.qwazr.extractor.ParserFactory
    public String getName() {
        return NAME;
    }

    @Override // com.qwazr.extractor.ParserFactory
    public ParserInterface createParser() {
        return this;
    }

    @Override // com.qwazr.extractor.ParserFactory
    public Collection<MediaType> getSupportedMimeTypes() {
        return DEFAULT_MIMETYPES;
    }

    @Override // com.qwazr.extractor.ParserInterface
    public ParserResult extract(MultivaluedMap<String, String> multivaluedMap, InputStream inputStream, MediaType mediaType) throws IOException {
        String iOUtils;
        ParserResult.Builder of = ParserResult.of(NAME);
        of.metas().set(MIME_TYPE, mediaType.toString());
        CharsetDetector charsetDetector = new CharsetDetector();
        BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
        try {
            charsetDetector.setText(bufferedInputStream);
            CharsetMatch detect = charsetDetector.detect();
            ParserResult.FieldsBuilder newDocument = of.newDocument();
            if (detect != null) {
                iOUtils = detect.getString();
                newDocument.add(CHARSET_DETECTION, detect.getName());
            } else {
                bufferedInputStream.reset();
                iOUtils = IOUtils.toString(bufferedInputStream, Charset.defaultCharset());
            }
            newDocument.add(CONTENT, iOUtils);
            newDocument.add(LANG_DETECTION, ParserUtils.languageDetection(newDocument, CONTENT, 10000));
            bufferedInputStream.close();
            return of.build();
        } catch (Throwable th) {
            try {
                bufferedInputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    @Override // com.qwazr.extractor.ParserInterface
    public ParserResult extract(MultivaluedMap<String, String> multivaluedMap, Path path) throws IOException {
        return ParserUtils.toBufferedStream(path, inputStream -> {
            return extract(multivaluedMap, inputStream, MediaType.TEXT_PLAIN_TYPE);
        });
    }
}
