package fr.erias.IAMsystem.tokenizer;

import fr.erias.IAMsystem.exceptions.UnfoundTokenInSentence;
import fr.erias.IAMsystem.normalizer.NormalizerInterface;
import fr.erias.IAMsystem.normalizer.NormalizerTerm;
import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:fr/erias/IAMsystem/tokenizer/TokenizerNormalizer.class */
public class TokenizerNormalizer {
    private NormalizerInterface normalizerTerm;
    private String[] tokensArray;
    private String[] tokensArrayOriginal;
    private int[][] tokenStartEndInSentence;
    static final Logger logger = LoggerFactory.getLogger(TokenizerNormalizer.class);
    private static String pattern = "[0-9]+|[a-z]+";

    public TokenizerNormalizer(NormalizerInterface normalizerInterface) {
        this.normalizerTerm = normalizerInterface;
    }

    public static String[] tokenizeAlphaNum(String str) {
        LinkedList linkedList = new LinkedList();
        Matcher matcher = Pattern.compile(pattern).matcher(str);
        while (matcher.find()) {
            linkedList.add(matcher.group());
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }

    public void tokenize(String str) {
        this.normalizerTerm.setSentence(str);
        this.tokensArray = tokenizeAlphaNum(this.normalizerTerm.getNormalizedSentence());
        try {
            setTokensStartEndInSentence();
        } catch (UnfoundTokenInSentence e) {
            logger.info("Something went wrong during detecting start and end of each token");
            e.printStackTrace();
        }
    }

    public void tokenizeWithoutEndStart(String str) {
        this.normalizerTerm.setSentence(str);
        this.tokensArray = tokenizeAlphaNum(this.normalizerTerm.getNormalizedSentence());
    }

    public void setNormalizer(NormalizerTerm normalizerTerm) {
        this.normalizerTerm = normalizerTerm;
    }

    public static void setPattern(String str) {
        pattern = str;
    }

    public NormalizerInterface getNormalizerTerm() {
        return this.normalizerTerm;
    }

    public String[] getTokensArrayOriginal() {
        return this.tokensArrayOriginal;
    }

    public String[] getTokens() {
        return this.tokensArray;
    }

    public int[][] getTokenStartEndInSentence() throws UnfoundTokenInSentence {
        return this.tokenStartEndInSentence;
    }

    private void setTokensStartEndInSentence() throws UnfoundTokenInSentence {
        String normalizedSentence = this.normalizerTerm.getNormalizedSentence();
        String originalSentence = this.normalizerTerm.getOriginalSentence();
        this.tokensArrayOriginal = new String[this.tokensArray.length];
        this.tokenStartEndInSentence = new int[this.tokensArray.length][2];
        int length = normalizedSentence.length();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        char[] charArray = normalizedSentence.toCharArray();
        for (int i4 = 0; i4 < this.tokensArray.length; i4++) {
            String str = this.tokensArray[i4];
            logger.debug("Current Token to found in sentence : " + str);
            char[] charArray2 = str.toCharArray();
            for (int i5 = 0; i5 < charArray2.length; i5++) {
                char c = charArray2[i5];
                char c2 = charArray[i];
                logger.debug("\t Comparing" + c + " and " + c2);
                while (c != c2) {
                    i++;
                    if (i > length) {
                        throw new UnfoundTokenInSentence(logger, str, normalizedSentence);
                    }
                    c2 = charArray[i];
                    logger.debug("\t advancing to next char");
                    logger.debug("\t Comparing now" + c + " and " + c2);
                }
                if (i5 == 0) {
                    i2 = i;
                }
                if (i5 == charArray2.length - 1) {
                    i3 = i;
                }
                logger.debug("\t char found at position" + Integer.toString(i));
                i++;
            }
            logger.debug("\t token found at positions " + Integer.toString(i2) + " - " + Integer.toString(i3));
            int[] iArr = new int[2];
            iArr[0] = i2;
            iArr[1] = i3;
            this.tokenStartEndInSentence[i4] = iArr;
            this.tokensArrayOriginal[i4] = originalSentence.substring(i2, i3 + 1);
            logger.debug("\t token original : " + this.tokensArrayOriginal[i4]);
        }
    }
}
