package com.qwazr.extractor.parser;

import com.qwazr.extractor.ParserAbstract;
import com.qwazr.extractor.ParserDocument;
import com.qwazr.extractor.ParserField;
import java.io.File;
import java.io.InputStream;
import org.odftoolkit.odfdom.pkg.OdfElement;
import org.odftoolkit.simple.Document;
import org.odftoolkit.simple.common.TextExtractor;
import org.odftoolkit.simple.meta.Meta;

/* loaded from: input_file:com/qwazr/extractor/parser/Odf.class */
public class Odf extends ParserAbstract {
    public static final String[] DEFAULT_MIMETYPES = {"application/vnd.oasis.opendocument.spreadsheet", "application/vnd.oasis.opendocument.spreadsheet-template", "application/vnd.oasis.opendocument.text", "application/vnd.oasis.opendocument.text-master", "application/vnd.oasis.opendocument.text-template", "application/vnd.oasis.opendocument.presentation", "application/vnd.oasis.opendocument.presentation-template"};
    public static final String[] DEFAULT_EXTENSIONS = {"ods", "ots", "odt", "odm", "ott", "odp", "otp"};
    protected static final ParserField TITLE = ParserField.newString("title", "The title of the document");
    protected static final ParserField CREATOR = ParserField.newString("creator", "The name of the creator");
    protected static final ParserField CREATION_DATE = ParserField.newDate("creation_date", "The date of creation");
    protected static final ParserField MODIFICATION_DATE = ParserField.newDate("modification_date", "The date of last modification");
    protected static final ParserField DESCRIPTION = ParserField.newString("description", null);
    protected static final ParserField KEYWORDS = ParserField.newString("keywords", null);
    protected static final ParserField SUBJECT = ParserField.newString("subject", "The subject of the document");
    protected static final ParserField CONTENT = ParserField.newString("content", "The content of the document");
    protected static final ParserField LANGUAGE = ParserField.newString("language", null);
    protected static final ParserField PRODUCER = ParserField.newString("producer", "The producer of the document");
    protected static final ParserField LANG_DETECTION = ParserField.newString("lang_detection", "Detection of the language");
    protected static final ParserField[] FIELDS = {TITLE, CREATOR, CREATION_DATE, MODIFICATION_DATE, DESCRIPTION, KEYWORDS, SUBJECT, CONTENT, LANGUAGE, PRODUCER, LANG_DETECTION};

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getParameters() {
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getFields() {
        return FIELDS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultMimeTypes() {
        return DEFAULT_MIMETYPES;
    }

    private void parseContent(Document document) throws Exception {
        if (document == null) {
            if (document != null) {
                return;
            } else {
                return;
            }
        }
        try {
            Meta officeMetadata = document.getOfficeMetadata();
            if (officeMetadata != null) {
                this.metas.add(CREATION_DATE, officeMetadata.getCreationDate());
                this.metas.add(MODIFICATION_DATE, officeMetadata.getDcdate());
                this.metas.add(TITLE, officeMetadata.getTitle());
                this.metas.add(SUBJECT, officeMetadata.getSubject());
                this.metas.add(CREATOR, officeMetadata.getCreator());
                this.metas.add(PRODUCER, officeMetadata.getGenerator());
                this.metas.add(KEYWORDS, officeMetadata.getKeywords());
                this.metas.add(LANGUAGE, officeMetadata.getLanguage());
            }
            OdfElement contentRoot = document.getContentRoot();
            if (contentRoot != null) {
                ParserDocument newParserDocument = getNewParserDocument();
                String text = TextExtractor.newOdfTextExtractor(contentRoot).getText();
                if (text != null) {
                    newParserDocument.add(CONTENT, text);
                    newParserDocument.add(LANG_DETECTION, languageDetection(CONTENT, 10000));
                }
            }
            if (document != null) {
                document.close();
            }
        } finally {
            if (document != null) {
                document.close();
            }
        }
    }

    @Override // com.qwazr.extractor.ParserAbstract
    protected void parseContent(InputStream inputStream, String str, String str2) throws Exception {
        parseContent(Document.loadDocument(inputStream));
    }

    @Override // com.qwazr.extractor.ParserAbstract
    protected void parseContent(File file, String str, String str2) throws Exception {
        parseContent(Document.loadDocument(file));
    }
}
