package de.datexis.preprocess;

import de.datexis.common.WordHelpers;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.tokenize.TokenContextGenerator;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.Span;
import opennlp.tools.util.StringUtil;

/* loaded from: input_file:de/datexis/preprocess/TokenizerMENL.class */
public class TokenizerMENL extends TokenizerME {
    private Pattern alphanumeric;
    private MaxentModel model;
    private TokenContextGenerator cg;
    private List<Double> tokProbs;
    private List<Span> newTokens;

    public TokenizerMENL(TokenizerModel tokenizerModel) {
        super(tokenizerModel);
        initializeFieldsFromReflection();
    }

    private void initializeFieldsFromReflection() {
        try {
            Field declaredField = TokenizerME.class.getDeclaredField("model");
            declaredField.setAccessible(true);
            this.model = (MaxentModel) declaredField.get(this);
            Field declaredField2 = TokenizerME.class.getDeclaredField("tokProbs");
            declaredField2.setAccessible(true);
            this.tokProbs = (List) declaredField2.get(this);
            Field declaredField3 = TokenizerME.class.getDeclaredField("newTokens");
            declaredField3.setAccessible(true);
            this.newTokens = (List) declaredField3.get(this);
            Field declaredField4 = TokenizerME.class.getDeclaredField("alphanumeric");
            declaredField4.setAccessible(true);
            this.alphanumeric = (Pattern) declaredField4.get(this);
            Field declaredField5 = TokenizerME.class.getDeclaredField("cg");
            declaredField5.setAccessible(true);
            this.cg = (TokenContextGenerator) declaredField5.get(this);
        } catch (Exception e) {
            Logger.getLogger(TokenizerMENL.class.getName()).log(Level.SEVERE, (String) null, (Throwable) e);
        }
    }

    public Span[] tokenizePos(String str) {
        return tokenizePosWithNewline(str);
    }

    public Span[] tokenizePosWhitespaceWithNewline(String str) {
        int i = -1;
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        int length = str.length();
        for (int i2 = 0; i2 < length; i2++) {
            char charAt = str.charAt(i2);
            boolean z2 = charAt == '\n';
            boolean z3 = "\"()[]{}".indexOf(charAt) != -1;
            if (!StringUtil.isWhitespace(charAt) || z2) {
                if (z2 || z3) {
                    if (z) {
                        arrayList.add(new Span(i, i2));
                        z = false;
                        i = -1;
                    }
                    arrayList.add(new Span(i2, i2 + 1));
                } else if (!z) {
                    i = i2;
                    z = true;
                }
            } else if (z) {
                arrayList.add(new Span(i, i2));
                z = false;
                i = -1;
            }
        }
        if (z) {
            arrayList.add(new Span(i, length));
        }
        return (Span[]) arrayList.toArray(new Span[arrayList.size()]);
    }

    public Span[] tokenizePosWithNewline(String str) {
        Span[] spanArr = tokenizePosWhitespaceWithNewline(str);
        this.newTokens.clear();
        this.tokProbs.clear();
        for (Span span : spanArr) {
            String substring = str.substring(span.getStart(), span.getEnd());
            if (substring.length() < 2) {
                this.newTokens.add(span);
                this.tokProbs.add(Double.valueOf(1.0d));
            } else if (useAlphaNumericOptimization() && this.alphanumeric.matcher(substring).matches()) {
                this.newTokens.add(span);
                this.tokProbs.add(Double.valueOf(1.0d));
            } else if (WordHelpers.abbreviationsEN.contains(substring) || WordHelpers.abbreviationsDE.contains(substring)) {
                this.newTokens.add(span);
                this.tokProbs.add(Double.valueOf(1.0d));
            } else {
                int start = span.getStart();
                int end = span.getEnd();
                int start2 = span.getStart();
                double d = 1.0d;
                for (int i = start2 + 1; i < end; i++) {
                    double[] eval = this.model.eval(this.cg.getContext(substring, i - start2));
                    String bestOutcome = this.model.getBestOutcome(eval);
                    d *= eval[this.model.getIndex(bestOutcome)];
                    if (bestOutcome.equals("T")) {
                        this.newTokens.add(new Span(start, i));
                        this.tokProbs.add(Double.valueOf(d));
                        start = i;
                        d = 1.0d;
                    }
                }
                this.newTokens.add(new Span(start, end));
                this.tokProbs.add(Double.valueOf(d));
            }
        }
        Span[] spanArr2 = new Span[this.newTokens.size()];
        this.newTokens.toArray(spanArr2);
        return spanArr2;
    }
}
