package com.formkiq.vision.pdf;

import com.formkiq.vision.document.DocumentText;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

/* loaded from: input_file:com/formkiq/vision/pdf/PDDocumentToTextTransformer.class */
public class PDDocumentToTextTransformer extends PDFTextStripper implements Function<PDDocument, Map<Integer, List<DocumentText>>> {
    private Map<Integer, List<DocumentText>> textMap;

    public PDDocumentToTextTransformer() throws IOException {
        setSortByPosition(true);
        this.textMap = new HashMap();
    }

    @Override // java.util.function.Function
    public Map<Integer, List<DocumentText>> apply(PDDocument pDDocument) {
        try {
            getText(pDDocument);
            mergeTextHorizontal();
            return this.textMap;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void mergeTextHorizontal() {
        Iterator<Map.Entry<Integer, List<DocumentText>>> it = this.textMap.entrySet().iterator();
        while (it.hasNext()) {
            List<DocumentText> value = it.next().getValue();
            PdfTextJoiningHorizontalTransformer pdfTextJoiningHorizontalTransformer = new PdfTextJoiningHorizontalTransformer(value);
            Iterator it2 = new ArrayList(value).iterator();
            while (it2.hasNext()) {
                DocumentText documentText = (DocumentText) it2.next();
                List<DocumentText> apply = pdfTextJoiningHorizontalTransformer.apply(documentText);
                if (documentText.equals(apply.get(0))) {
                    documentText.setText((String) apply.stream().map(documentText2 -> {
                        return documentText2.getText();
                    }).collect(Collectors.joining(StringUtils.SPACE)));
                    documentText.setUpperRightX(((Float) apply.stream().map(documentText3 -> {
                        return Float.valueOf(documentText3.getUpperRightX());
                    }).max(Comparator.naturalOrder()).get()).floatValue());
                    if (apply.size() > 1) {
                        value.removeAll(apply.subList(1, apply.size()));
                    }
                } else {
                    value.remove(documentText);
                }
            }
        }
    }

    @Override // org.apache.pdfbox.text.PDFTextStripper
    protected void writeString(String str, List<TextPosition> list) throws IOException {
        Integer valueOf = Integer.valueOf(getCurrentPageNo() - 1);
        if (!this.textMap.containsKey(valueOf)) {
            this.textMap.put(valueOf, new ArrayList());
        }
        List<TextPosition> removeNonPrintableAndExtraSpaces = removeNonPrintableAndExtraSpaces(list);
        for (List<TextPosition> list2 : splitAtPoints(removeNonPrintableAndExtraSpaces, getSplitPoints(removeNonPrintableAndExtraSpaces))) {
            String pDDocumentToTextTransformer = toString(list2);
            if (!pDDocumentToTextTransformer.isEmpty()) {
                PDRectangle calculateTextPosition = calculateTextPosition(list2);
                PDFont font = list2.get(0).getFont();
                float floatValue = ((Float) list2.stream().map(textPosition -> {
                    return Float.valueOf(textPosition.getFontSizeInPt());
                }).max((v0, v1) -> {
                    return Float.compare(v0, v1);
                }).orElse(Float.valueOf(0.0f))).floatValue();
                PdfText pdfText = new PdfText();
                pdfText.setText(pDDocumentToTextTransformer.replaceAll("\t", StringUtils.SPACE));
                pdfText.setRectangle(calculateTextPosition);
                pdfText.setFontSize(floatValue);
                pdfText.setFontName(font.getName());
                this.textMap.get(valueOf).add(pdfText);
            }
        }
    }

    private String toString(List<TextPosition> list) {
        return (String) list.stream().map(textPosition -> {
            return textPosition.getUnicode();
        }).collect(Collectors.joining());
    }

    private List<List<TextPosition>> splitAtPoints(List<TextPosition> list, List<Integer> list2) {
        ArrayList arrayList = new ArrayList();
        if (list2.isEmpty()) {
            arrayList.add(list);
        } else {
            int i = 0;
            for (int i2 = 0; i2 < list2.size(); i2++) {
                int intValue = list2.get(i2).intValue();
                int size = intValue > list.size() ? list.size() : intValue;
                arrayList.add(list.subList(i, size));
                i = size;
            }
        }
        return arrayList;
    }

    private List<Integer> getSplitPoints(List<TextPosition> list) {
        String pDDocumentToTextTransformer = toString(list);
        ArrayList arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("[\\s]*[_]+[\\s]*").matcher(pDDocumentToTextTransformer);
        while (matcher.find()) {
            arrayList.add(Integer.valueOf(matcher.start()));
            arrayList.add(Integer.valueOf(matcher.end()));
        }
        int size = list.size();
        for (int i = 1; i < size; i++) {
            TextPosition textPosition = list.get(i - 1);
            float translateX = list.get(i).getTextMatrix().getTranslateX();
            float translateX2 = textPosition.getTextMatrix().getTranslateX() + textPosition.getWidth() + 5.0f;
            if (translateX > translateX2) {
                arrayList.add(Integer.valueOf(i));
            } else if (translateX2 - translateX > 10.0f) {
                arrayList.add(Integer.valueOf(i));
            }
        }
        if (!arrayList.isEmpty()) {
            arrayList.add(Integer.valueOf(size));
        }
        Collections.sort(arrayList);
        return arrayList;
    }

    private PDRectangle calculateTextPosition(List<TextPosition> list) {
        float round = Math.round(minimum(list.stream().map(textPosition -> {
            return Float.valueOf(textPosition.getXDirAdj());
        })));
        float round2 = Math.round(maximum(list.stream().map(textPosition2 -> {
            return Float.valueOf(textPosition2.getXDirAdj());
        })));
        float round3 = Math.round(minimum(list.stream().map(textPosition3 -> {
            return Float.valueOf(textPosition3.getPageHeight() - textPosition3.getYDirAdj());
        })));
        float height = getHeight(list);
        return list.get(0).getDir() == 90.0f ? PDRectangleUtil.create(list.get(0).getYDirAdj() - height, round, list.get(0).getYDirAdj(), round2) : PDRectangleUtil.create(round, round3, round2, round3 + height);
    }

    private List<TextPosition> removeNonPrintableAndExtraSpaces(List<TextPosition> list) {
        List<TextPosition> list2 = (List) list.stream().filter(textPosition -> {
            return cleanTextContent(textPosition.getUnicode()).equals(textPosition.getUnicode());
        }).collect(Collectors.toList());
        int i = 0;
        Iterator<TextPosition> it = list2.iterator();
        while (it.hasNext()) {
            if (StringUtils.isEmpty(it.next().getUnicode().trim())) {
                i++;
                if (i > 2) {
                    it.remove();
                }
            } else {
                i = 0;
            }
        }
        return list2;
    }

    private String cleanTextContent(String str) {
        return (str.getBytes(StandardCharsets.UTF_8).length <= 1 || Integer.toUnsignedLong(str.charAt(0)) <= 8300) ? str : "";
    }

    private float minimum(Stream<Float> stream) {
        return stream.min((v0, v1) -> {
            return Float.compare(v0, v1);
        }).get().floatValue();
    }

    private float maximum(Stream<Float> stream) {
        return stream.max((v0, v1) -> {
            return Float.compare(v0, v1);
        }).get().floatValue();
    }

    private float getHeight(List<TextPosition> list) {
        return ((Float) list.stream().map(textPosition -> {
            return Float.valueOf(textPosition.getHeight());
        }).max((v0, v1) -> {
            return Float.compare(v0, v1);
        }).get()).floatValue();
    }

    public Map<Integer, List<DocumentText>> getTextLocations() {
        return this.textMap;
    }
}
