package com.qwazr.extractor.parser;

import com.qwazr.extractor.ParserAbstract;
import com.qwazr.extractor.ParserDocument;
import com.qwazr.extractor.ParserField;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.text.PDFTextStripper;

/* loaded from: input_file:com/qwazr/extractor/parser/PdfBox.class */
public class PdfBox extends ParserAbstract {
    public static final String[] DEFAULT_MIMETYPES = {"application/pdf"};
    public static final String[] DEFAULT_EXTENSIONS = {"pdf"};
    protected static final ParserField TITLE = ParserField.newString("title", "The title of the Word document");
    protected static final ParserField AUTHOR = ParserField.newString("author", "The name of the author");
    protected static final ParserField SUBJECT = ParserField.newString("subject", "The subject of the document");
    protected static final ParserField CONTENT = ParserField.newString("content", "The content of the document");
    protected static final ParserField PRODUCER = ParserField.newString("producer", "The producer of the document");
    protected static final ParserField KEYWORDS = ParserField.newString("keywords", "The keywords of the document");
    protected static final ParserField CREATION_DATE = ParserField.newDate("creation_date", null);
    protected static final ParserField MODIFICATION_DATE = ParserField.newDate("modification_date", null);
    protected static final ParserField LANGUAGE = ParserField.newString("language", null);
    protected static final ParserField ROTATION = ParserField.newInteger("rotation", null);
    protected static final ParserField NUMBER_OF_PAGES = ParserField.newInteger("number_of_pages", null);
    protected static final ParserField CHARACTER_COUNT = ParserField.newInteger("character_count", null);
    protected static final ParserField LANG_DETECTION = ParserField.newString("lang_detection", "Detection of the language");
    protected static final ParserField[] FIELDS = {TITLE, AUTHOR, SUBJECT, CONTENT, PRODUCER, KEYWORDS, CREATION_DATE, MODIFICATION_DATE, LANGUAGE, ROTATION, NUMBER_OF_PAGES, LANG_DETECTION};
    protected static final ParserField PASSWORD = ParserField.newString("password", "");
    protected static final ParserField[] PARAMETERS = {PASSWORD};

    /* loaded from: input_file:com/qwazr/extractor/parser/PdfBox$Stripper.class */
    public class Stripper extends PDFTextStripper {
        public Stripper() throws IOException {
        }

        protected void endPage(PDPage pDPage) throws IOException {
            super.endPage(pDPage);
            ParserDocument newParserDocument = PdfBox.this.getNewParserDocument();
            String obj = this.output.toString();
            newParserDocument.add(PdfBox.CHARACTER_COUNT, Integer.valueOf(obj.length()));
            newParserDocument.add(PdfBox.CONTENT, obj);
            newParserDocument.add(PdfBox.LANG_DETECTION, PdfBox.this.languageDetection(PdfBox.CONTENT, 10000));
            newParserDocument.add(PdfBox.ROTATION, Integer.valueOf(pDPage.getRotation()));
            this.output = new StringWriter();
        }
    }

    private void extractMetaData(PDDocument pDDocument) throws IOException {
        PDDocumentInformation documentInformation = pDDocument.getDocumentInformation();
        if (documentInformation != null) {
            this.metas.add(TITLE, documentInformation.getTitle());
            this.metas.add(SUBJECT, documentInformation.getSubject());
            this.metas.add(AUTHOR, documentInformation.getAuthor());
            this.metas.add(PRODUCER, documentInformation.getProducer());
            this.metas.add(KEYWORDS, documentInformation.getKeywords());
            this.metas.add(CREATION_DATE, documentInformation.getCreationDate());
            this.metas.add(MODIFICATION_DATE, documentInformation.getModificationDate());
        }
        this.metas.add(NUMBER_OF_PAGES, Integer.valueOf(pDDocument.getNumberOfPages()));
        PDDocumentCatalog documentCatalog = pDDocument.getDocumentCatalog();
        if (documentCatalog != null) {
            this.metas.add(LANGUAGE, documentCatalog.getLanguage());
        }
    }

    private void parseContent(PDDocument pDDocument) throws Exception {
        try {
            extractMetaData(pDDocument);
            new Stripper().getText(pDDocument);
            if (pDDocument != null) {
                pDDocument.close();
            }
        } catch (Throwable th) {
            if (pDDocument != null) {
                pDDocument.close();
            }
            throw th;
        }
    }

    private String getPassword() {
        String parameterValue = getParameterValue(PASSWORD, 0);
        return parameterValue == null ? "" : parameterValue;
    }

    @Override // com.qwazr.extractor.ParserAbstract
    public void parseContent(InputStream inputStream, String str, String str2) throws Exception {
        parseContent(PDDocument.load(inputStream, getPassword()));
    }

    @Override // com.qwazr.extractor.ParserAbstract
    public void parseContent(File file, String str, String str2) throws Exception {
        parseContent(PDDocument.load(file, getPassword()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getParameters() {
        return PARAMETERS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public ParserField[] getFields() {
        return FIELDS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultExtensions() {
        return DEFAULT_EXTENSIONS;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // com.qwazr.extractor.ParserAbstract
    public String[] getDefaultMimeTypes() {
        return DEFAULT_MIMETYPES;
    }
}
