package de.digitalcollections.solrocr.formats.hocr;

import com.google.common.collect.ImmutableSet;
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/digitalcollections/solrocr/formats/hocr/HocrClassBreakIterator.class */
public class HocrClassBreakIterator extends BreakIterator {
    private static final Pattern CLASS_PAT = Pattern.compile("class=['\"](?<class>ocr.+?)['\"]");
    private final Set<String> breakClasses;
    private CharacterIterator text;
    private int current;

    public HocrClassBreakIterator(String str) {
        this.breakClasses = ImmutableSet.of(str);
    }

    public HocrClassBreakIterator(Set<String> set) {
        this.breakClasses = set;
    }

    @Override // java.text.BreakIterator
    public int first() {
        this.text.first();
        this.current = this.text.getIndex();
        return current();
    }

    @Override // java.text.BreakIterator
    public int last() {
        this.text.last();
        this.current = this.text.getIndex();
        return current();
    }

    @Override // java.text.BreakIterator
    public int next(int i) {
        for (int i2 = i; i2 > 0; i2--) {
            next();
        }
        return this.current;
    }

    @Override // java.text.BreakIterator
    public int next() {
        String str = "";
        String str2 = "";
        StringBuilder sb = null;
        while (!this.breakClasses.contains(str2)) {
            char current = this.text.current();
            if (current == '<') {
                sb = new StringBuilder();
            }
            if (sb != null) {
                sb.append(current);
                if (current == '>') {
                    str = sb.toString();
                    str2 = getHocrClass(str);
                    sb = null;
                }
            }
            if (this.text.next() == 65535) {
                this.current = this.text.getIndex();
                return this.current;
            }
        }
        this.current = this.text.getIndex() - str.length();
        return this.current;
    }

    private String getHocrClass(String str) {
        Matcher matcher = CLASS_PAT.matcher(str);
        return matcher.find() ? matcher.group("class") : "";
    }

    @Override // java.text.BreakIterator
    public int previous() {
        String str = "";
        StringBuilder sb = null;
        while (!this.breakClasses.contains(str)) {
            char current = this.text.current();
            if (current == '>') {
                sb = new StringBuilder();
            }
            if (sb != null) {
                sb.insert(0, current);
                if (current == '<') {
                    str = getHocrClass(sb.toString());
                    sb = null;
                }
            }
            if (this.text.previous() == 65535) {
                this.current = this.text.getIndex();
                return this.current;
            }
        }
        this.current = this.text.getIndex() + 1;
        return this.current;
    }

    @Override // java.text.BreakIterator
    public int following(int i) {
        this.text.setIndex(i);
        return next();
    }

    @Override // java.text.BreakIterator
    public int preceding(int i) {
        this.text.setIndex(i);
        return previous();
    }

    @Override // java.text.BreakIterator
    public int current() {
        return this.current;
    }

    @Override // java.text.BreakIterator
    public CharacterIterator getText() {
        return this.text;
    }

    @Override // java.text.BreakIterator
    public void setText(CharacterIterator characterIterator) {
        this.text = characterIterator;
    }
}
