package de.digitalcollections.solrocr.formats.mini;

import com.google.common.base.Strings;
import de.digitalcollections.solrocr.formats.OcrPassageFormatter;
import de.digitalcollections.solrocr.iter.IterableCharSequence;
import de.digitalcollections.solrocr.iter.TagBreakIterator;
import de.digitalcollections.solrocr.model.OcrBox;
import de.digitalcollections.solrocr.model.OcrPage;
import de.digitalcollections.solrocr.model.OcrSnippet;
import java.awt.Dimension;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.TreeMap;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.lucene.search.uhighlight.Passage;

/* loaded from: input_file:de/digitalcollections/solrocr/formats/mini/MiniOcrPassageFormatter.class */
public class MiniOcrPassageFormatter extends OcrPassageFormatter {
    private static final Pattern wordPat = Pattern.compile("<w x=\"(?<x>1?\\.?\\d+?) (?<y>1?\\.?\\d+?) (?<w>1?\\.?\\d+?) (?<h>1?\\.?\\d+?)\">(?<text>.+?)</w>");
    private static final Pattern pagePat = Pattern.compile("<p xml:id=\"(?<pageId>.+?)\" ?(?:wh=\"(?<w>\\d+) (?<h>\\d+)\")?>");
    private final TagBreakIterator pageIter;

    public MiniOcrPassageFormatter(String str, String str2, boolean z, boolean z2) {
        super(str, str2, z, z2);
        this.pageIter = new TagBreakIterator("p");
    }

    @Override // de.digitalcollections.solrocr.formats.OcrPassageFormatter
    public OcrPage determineStartPage(String str, int i, IterableCharSequence iterableCharSequence) {
        this.pageIter.setText(iterableCharSequence);
        int preceding = this.pageIter.preceding(i);
        Matcher matcher = pagePat.matcher(iterableCharSequence.subSequence(preceding, Math.min(preceding + 128, iterableCharSequence.length())).toString());
        if (!matcher.find()) {
            return null;
        }
        Dimension dimension = null;
        if (!Strings.isNullOrEmpty(matcher.group("w")) && !Strings.isNullOrEmpty(matcher.group("h"))) {
            try {
                dimension = new Dimension(Integer.parseInt(matcher.group("w")), Integer.parseInt(matcher.group("h")));
            } catch (NumberFormatException e) {
            }
        }
        return new OcrPage(matcher.group("pageId"), dimension);
    }

    @Override // de.digitalcollections.solrocr.formats.OcrPassageFormatter
    protected TreeMap<Integer, OcrPage> parsePages(String str) {
        TreeMap<Integer, OcrPage> treeMap = new TreeMap<>();
        Matcher matcher = pagePat.matcher(str);
        while (matcher.find()) {
            Dimension dimension = null;
            if (!Strings.isNullOrEmpty(matcher.group("w")) && !Strings.isNullOrEmpty(matcher.group("h"))) {
                try {
                    dimension = new Dimension(Integer.parseInt(matcher.group("w")), Integer.parseInt(matcher.group("h")));
                } catch (NumberFormatException e) {
                }
            }
            treeMap.put(Integer.valueOf(matcher.start()), new OcrPage(matcher.group("pageId"), dimension));
        }
        return treeMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // de.digitalcollections.solrocr.formats.OcrPassageFormatter
    public void addHighlightsToSnippet(List<List<OcrBox>> list, OcrSnippet ocrSnippet) {
        if (this.absoluteHighlights) {
            super.addHighlightsToSnippet(list, ocrSnippet);
        } else {
            list.stream().flatMap((v0) -> {
                return v0.stream();
            }).forEach(ocrBox -> {
                Optional<OcrBox> findFirst = ocrSnippet.getSnippetRegions().stream().filter(ocrBox -> {
                    return ocrBox.contains(ocrBox);
                }).findFirst();
                if (findFirst.isPresent()) {
                    float ulx = findFirst.get().getUlx();
                    float uly = findFirst.get().getUly();
                    float lrx = findFirst.get().getLrx() - ulx;
                    float lry = findFirst.get().getLry() - uly;
                    ocrBox.setUlx(truncateFloat((ocrBox.getUlx() - ulx) / lrx));
                    ocrBox.setLrx(truncateFloat((ocrBox.getLrx() - ulx) / lrx));
                    ocrBox.setUly(truncateFloat((ocrBox.getUly() - uly) / lry));
                    ocrBox.setLry(truncateFloat((ocrBox.getLry() - uly) / lry));
                    ocrBox.setParentRegionIdx(ocrSnippet.getSnippetRegions().indexOf(findFirst.get()));
                    ocrBox.setText(ocrBox.getText().replaceAll(this.startHlTag, "").replaceAll(this.endHlTag, ""));
                }
            });
            list.forEach(list2 -> {
                ocrSnippet.addHighlightSpan(mergeBoxes(list2));
            });
        }
    }

    @Override // de.digitalcollections.solrocr.formats.OcrPassageFormatter
    protected List<OcrBox> parseWords(String str, TreeMap<Integer, OcrPage> treeMap, String str2) {
        ArrayList arrayList = new ArrayList();
        UUID uuid = null;
        Matcher matcher = wordPat.matcher(str);
        while (matcher.find()) {
            String str3 = str2;
            if (treeMap.floorKey(Integer.valueOf(matcher.start())) != null) {
                str3 = treeMap.floorEntry(Integer.valueOf(matcher.start())).getValue().id;
            }
            float parseFloat = Float.parseFloat(matcher.group("x"));
            float parseFloat2 = Float.parseFloat(matcher.group("y"));
            float parseFloat3 = Float.parseFloat(matcher.group("w"));
            float parseFloat4 = Float.parseFloat(matcher.group("h"));
            String unescapeXml = StringEscapeUtils.unescapeXml(matcher.group("text"));
            if (unescapeXml.contains(this.startHlTag)) {
                uuid = UUID.randomUUID();
            }
            arrayList.add(new OcrBox(unescapeXml, str3, parseFloat, parseFloat2, parseFloat + parseFloat3, parseFloat2 + parseFloat4, uuid));
            if (unescapeXml.contains(this.endHlTag) || str.substring(matcher.end(), Math.min(matcher.end() + this.endHlTag.length(), str.length())).equals(this.endHlTag)) {
                uuid = null;
            }
        }
        return arrayList;
    }

    private float truncateFloat(float f) {
        return ((float) Math.floor(f * 10000.0f)) / 10000.0f;
    }

    @Override // de.digitalcollections.solrocr.formats.OcrPassageFormatter
    public Object format(Passage[] passageArr, String str) {
        throw new UnsupportedOperationException();
    }
}
