package com.adobe.internal.pdftoolkit.services.readingorder;

import com.adobe.internal.pdftoolkit.core.exceptions.PDFFontException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFIOException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFInvalidDocumentException;
import com.adobe.internal.pdftoolkit.core.exceptions.PDFSecurityException;
import com.adobe.internal.pdftoolkit.core.fontset.PDFFontSet;
import com.adobe.internal.pdftoolkit.core.types.ASCoordinate;
import com.adobe.internal.pdftoolkit.pdf.document.PDFDocument;
import com.adobe.internal.pdftoolkit.pdf.page.PDFPage;
import com.adobe.internal.pdftoolkit.services.textextraction.TextExtractor;
import com.adobe.internal.pdftoolkit.services.textextraction.Word;
import com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:com/adobe/internal/pdftoolkit/services/readingorder/LayoutModeTextExtractor.class */
public class LayoutModeTextExtractor {
    private PDFDocument pdfDoc;
    private PDFFontSet fontSet;
    private int charcountInLine = 0;
    public static String NEWLINE_STRING = System.getProperty("line.separator");
    private static double NEWLINE_THRESHOLD = 0.95d;
    private static double SPACE_MAX_THRESHOLD = 2.0d;

    /* loaded from: input_file:com/adobe/internal/pdftoolkit/services/readingorder/LayoutModeTextExtractor$DocumentWordsIterator.class */
    private class DocumentWordsIterator implements WordsIterator {
        private int pageIndex = 0;
        Iterator<?> pagesIter;
        WordsIterator wordsIter;

        DocumentWordsIterator() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            this.pagesIter = LayoutModeTextExtractor.this.pdfDoc.requirePages().iterator();
            if (this.pagesIter.hasNext()) {
                PDFPage pDFPage = (PDFPage) this.pagesIter.next();
                this.pageIndex++;
                this.wordsIter = LayoutModeTextExtractor.this.getWordsIterator(pDFPage, this.pageIndex);
            }
        }

        @Override // com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator
        public boolean hasNext() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (this.wordsIter.hasNext()) {
                return true;
            }
            if (!this.pagesIter.hasNext()) {
                return false;
            }
            while (this.pagesIter.hasNext() && !this.wordsIter.hasNext()) {
                PDFPage pDFPage = (PDFPage) this.pagesIter.next();
                this.pageIndex++;
                this.wordsIter = LayoutModeTextExtractor.this.getWordsIterator(pDFPage, this.pageIndex);
                if (this.wordsIter.hasNext()) {
                    return this.wordsIter.hasNext();
                }
            }
            return false;
        }

        @Override // com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator
        public Word next() throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
            if (hasNext()) {
                return this.wordsIter.next();
            }
            return null;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/adobe/internal/pdftoolkit/services/readingorder/LayoutModeTextExtractor$WordListIterator.class */
    public static class WordListIterator implements WordsIterator {
        Iterator<Word> wordsIter;

        WordListIterator(List<Word> list) {
            this.wordsIter = list.iterator();
        }

        @Override // com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator
        public boolean hasNext() {
            return this.wordsIter.hasNext();
        }

        @Override // com.adobe.internal.pdftoolkit.services.textextraction.WordsIterator
        public Word next() {
            return this.wordsIter.next();
        }
    }

    public static LayoutModeTextExtractor newInstance(PDFDocument pDFDocument, PDFFontSet pDFFontSet) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new LayoutModeTextExtractor(pDFDocument, pDFFontSet);
    }

    private LayoutModeTextExtractor(PDFDocument pDFDocument, PDFFontSet pDFFontSet) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        this.pdfDoc = pDFDocument;
        this.fontSet = pDFFontSet;
    }

    public WordsIterator getWordsIterator() throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException, IOException {
        return new DocumentWordsIterator();
    }

    public WordsIterator getWordsIterator(PDFPage pDFPage, int i) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        return new WordListIterator(convertPDFtoLayoutText(pDFPage, i));
    }

    public List<Word> convertPDFtoLayoutText(PDFPage pDFPage, int i) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException, PDFFontException {
        TextExtractor newInstance = TextExtractor.newInstance(this.pdfDoc, this.fontSet, false);
        ASCoordinate aSCoordinate = null;
        Word word = null;
        double findSmallestAverageCharWidthInPage = findSmallestAverageCharWidthInPage(newInstance.getROTEWordsIterator(pDFPage, i)) * 1.5d;
        WordsIterator rOTEWordsIterator = newInstance.getROTEWordsIterator(pDFPage, i);
        ArrayList arrayList = new ArrayList();
        if (rOTEWordsIterator.hasNext()) {
            Word next = rOTEWordsIterator.next();
            startSpace(arrayList, next, findSmallestAverageCharWidthInPage);
            aSCoordinate = writeText(arrayList, null, null, next, findSmallestAverageCharWidthInPage);
            word = next;
        }
        while (rOTEWordsIterator.hasNext()) {
            Word next2 = rOTEWordsIterator.next();
            aSCoordinate = writeText(arrayList, aSCoordinate, word, next2, findSmallestAverageCharWidthInPage);
            word = next2;
        }
        arrayList.add(new Word(null, NEWLINE_STRING, word.getPageNumber()));
        return arrayList;
    }

    private double findSmallestAverageCharWidthInPage(WordsIterator wordsIterator) throws PDFInvalidDocumentException, PDFSecurityException, PDFIOException, PDFFontException {
        double d = Double.MAX_VALUE;
        while (wordsIterator.hasNext()) {
            double avgWidth = wordsIterator.next().avgWidth();
            if (avgWidth < d) {
                d = avgWidth;
            }
        }
        return d;
    }

    private ASCoordinate writeText(List<Word> list, ASCoordinate aSCoordinate, Word word, Word word2, double d) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        ASCoordinate aSCoordinate2 = word2.topLeft();
        if (word != null) {
            double x = word2.topLeft().x() - word.bottomRight().x();
            double y = (word.bottomRight().y() - word2.topLeft().y()) / word2.charHeight();
            if (aSCoordinate == null || (aSCoordinate2.x() - aSCoordinate.x() > 0.0d && y <= 2.0d)) {
                if (list != null) {
                    double charWidth = x / word2.charWidth();
                    if (charWidth <= SPACE_MAX_THRESHOLD) {
                        list.add(new Word(null, " ", word.getPageNumber()));
                        this.charcountInLine++;
                    } else if (charWidth > 2.0d) {
                        double x2 = (word2.topLeft().x() / d) - this.charcountInLine;
                        for (int i = 1; i < x2; i++) {
                            list.add(new Word(null, " ", word.getPageNumber()));
                            this.charcountInLine++;
                        }
                    }
                }
            } else if (list != null) {
                StringBuilder sb = new StringBuilder("");
                for (int i2 = 1; i2 < y; i2++) {
                    sb.append(NEWLINE_STRING);
                }
                list.add(new Word(null, sb.toString(), word.getPageNumber()));
                if (y > NEWLINE_THRESHOLD && y <= 1.0d) {
                    list.add(new Word(null, NEWLINE_STRING, word.getPageNumber()));
                } else if (y <= NEWLINE_THRESHOLD && aSCoordinate2.x() - aSCoordinate.x() <= 0.0d) {
                    list.add(new Word(null, NEWLINE_STRING, word.getPageNumber()));
                }
                startSpace(list, word2, d);
            }
        }
        if (list != null) {
            list.add(word2);
            this.charcountInLine += word2.toString().length();
        }
        return aSCoordinate2;
    }

    private void startSpace(List<Word> list, Word word, double d) throws PDFInvalidDocumentException, PDFIOException, PDFSecurityException {
        double x = word.topLeft().x() / d;
        this.charcountInLine = 0;
        for (int i = 1; i < x; i++) {
            list.add(new Word(null, " ", word.getPageNumber()));
            this.charcountInLine++;
        }
    }
}
