package fr.erias.IAMsystem.tokenizernormalizer;

import fr.erias.IAMsystem.exceptions.InvalidSentenceLength;
import fr.erias.IAMsystem.exceptions.UnfoundTokenInSentence;
import fr.erias.IAMsystem.normalizer.INormalizer;
import fr.erias.IAMsystem.normalizer.Normalizer;
import fr.erias.IAMsystem.stopwords.IStopwords;
import fr.erias.IAMsystem.tokenizer.ITokenizer;

/* loaded from: input_file:fr/erias/IAMsystem/tokenizernormalizer/TokenizerNormalizer.class */
public class TokenizerNormalizer implements ITokenizerNormalizer {
    private INormalizer normalizer;
    private ITokenizer tokenizer;

    public TokenizerNormalizer(INormalizer iNormalizer, ITokenizer iTokenizer) {
        this.normalizer = iNormalizer;
        this.tokenizer = iTokenizer;
    }

    public static TokenizerNormalizer getDefaultTokenizerNormalizer(IStopwords iStopwords) {
        return new TokenizerNormalizer(new Normalizer(iStopwords), ITokenizer.getDefaultTokenizer());
    }

    public static TokenizerNormalizer getDefaultTokenizerNormalizer() {
        return getDefaultTokenizerNormalizer(IStopwords.noStopwords);
    }

    @Override // fr.erias.IAMsystem.tokenizernormalizer.ITokenizerNormalizer
    public TNoutput tokenizeNormalize(String str) {
        String str2 = str;
        String normalizedSentence = this.normalizer.getNormalizedSentence(str);
        try {
            checkUnchangedLength(str2, normalizedSentence);
        } catch (InvalidSentenceLength e) {
            StringBuilder sb = new StringBuilder();
            sb.append("Something went wrong during normalization");
            sb.append("\t original sentence:" + str2);
            sb.append("\t\toriginal sentence length: " + str.length());
            sb.append("\t normalizedSentence:" + normalizedSentence);
            sb.append("\t\tnormalizedSentence length: " + normalizedSentence.length());
            sb.append("sentence length: " + str.length() + " : \n " + str);
            System.err.println(sb.toString());
            e.printStackTrace();
            str2 = normalizedSentence;
        }
        String[] strArr = this.tokenizer.tokenize(normalizedSentence);
        try {
            int[][] tokensStartEndInSentence = getTokensStartEndInSentence(normalizedSentence, strArr);
            return new TNoutput(str2, normalizedSentence, strArr, getTokensArrayOriginal(strArr, str2, tokensStartEndInSentence), tokensStartEndInSentence);
        } catch (UnfoundTokenInSentence e2) {
            e2.printStackTrace();
            return TNoutput.getErrorTNoutput();
        }
    }

    public String[] tokenizeWithoutEndStart(String str) {
        return this.tokenizer.tokenize(this.normalizer.getNormalizedSentence(str));
    }

    private String[] getTokensArrayOriginal(String[] strArr, String str, int[][] iArr) {
        String[] strArr2 = new String[strArr.length];
        for (int i = 0; i < iArr.length; i++) {
            int[] iArr2 = iArr[i];
            strArr2[i] = str.substring(iArr2[0], iArr2[1] + 1);
        }
        return strArr2;
    }

    private void checkUnchangedLength(String str, String str2) throws InvalidSentenceLength {
        if (str.length() != str2.length()) {
            throw new InvalidSentenceLength("Original length " + str.length() + " : " + str + "\nModified length " + str2.length() + str2);
        }
    }

    public void setNormalizer(INormalizer iNormalizer) {
        this.normalizer = iNormalizer;
    }

    public void setTokenizer(ITokenizer iTokenizer) {
        this.tokenizer = iTokenizer;
    }

    public INormalizer getNormalizer() {
        return this.normalizer;
    }

    public ITokenizer getTokenizer() {
        return this.tokenizer;
    }

    private int[][] getTokensStartEndInSentence(String str, String[] strArr) throws UnfoundTokenInSentence {
        int[][] iArr = new int[strArr.length][2];
        int length = str.length();
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        char[] charArray = str.toCharArray();
        for (int i4 = 0; i4 < strArr.length; i4++) {
            String str2 = strArr[i4];
            char[] charArray2 = str2.toCharArray();
            for (int i5 = 0; i5 < charArray2.length; i5++) {
                char c = charArray2[i5];
                char c2 = charArray[i];
                while (c != c2) {
                    i++;
                    if (i > length) {
                        throw new UnfoundTokenInSentence(str2, str);
                    }
                    c2 = charArray[i];
                }
                if (i5 == 0) {
                    i2 = i;
                }
                if (i5 == charArray2.length - 1) {
                    i3 = i;
                }
                i++;
            }
            int[] iArr2 = new int[2];
            iArr2[0] = i2;
            iArr2[1] = i3;
            iArr[i4] = iArr2;
        }
        return iArr;
    }

    @Override // fr.erias.IAMsystem.tokenizer.ITokenizer
    public String[] tokenize(String str) {
        return getTokenizer().tokenize(str);
    }

    @Override // fr.erias.IAMsystem.normalizer.INormalizer
    public String getNormalizedSentence(String str) {
        return getNormalizer().getNormalizedSentence(str);
    }

    @Override // fr.erias.IAMsystem.stopwords.IStopwords
    public boolean isStopWord(String str) {
        return this.normalizer.isStopWord(str);
    }
}
