package org.languagetool.rules.ngrams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.languagetool.AnalyzedSentence;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.tokenizers.Tokenizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/languagetool/rules/ngrams/LanguageModelUtils.class */
public final class LanguageModelUtils {
    private static final Logger logger = LoggerFactory.getLogger(LanguageModelUtils.class);

    private LanguageModelUtils() {
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Tokenizer getGoogleStyleWordTokenizer(Language language) {
        return language.getWordTokenizer();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static List<String> getContext(GoogleToken googleToken, List<GoogleToken> list, String str, int i, int i2) {
        return getContext(googleToken, list, (List<GoogleToken>) Collections.singletonList(new GoogleToken(str, 0, str.length())), i, i2);
    }

    static List<String> getContext(GoogleToken googleToken, List<GoogleToken> list, List<GoogleToken> list2, int i, int i2) {
        return (List) getContext(googleToken, list, list2, i, i2, (v0) -> {
            return v0.isWhitespace();
        }, new GoogleToken(".", 0, 0)).stream().map(googleToken2 -> {
            return googleToken2.token;
        }).collect(Collectors.toList());
    }

    public static <T> List<T> getContext(T t, List<T> list, List<T> list2, int i, int i2, Predicate<T> predicate, T t2) {
        int indexOf = list.indexOf(t);
        if (indexOf == -1) {
            throw new RuntimeException(String.format("Token not found: '%s' in tokens %s", t, list));
        }
        ArrayList arrayList = new ArrayList();
        int i3 = 1;
        int i4 = 0;
        while (i4 < i) {
            if (indexOf - i3 < 0) {
                arrayList.clear();
                arrayList.addAll(list2);
                for (int i5 = indexOf - 1; i5 >= 0; i5--) {
                    arrayList.add(0, list.get(i5));
                }
                return arrayList;
            }
            if (!predicate.test(list.get(indexOf - i3))) {
                arrayList.add(0, list.get(indexOf - i3));
                i4++;
            }
            i3++;
        }
        arrayList.addAll(list2);
        int i6 = 1;
        int i7 = 0;
        while (i7 < i2) {
            if (indexOf + i6 >= list.size()) {
                arrayList.add(t2);
                i7++;
            } else if (!predicate.test(list.get(indexOf + i6))) {
                arrayList.add(list.get(indexOf + i6));
                i7++;
            }
            i6++;
        }
        return arrayList;
    }

    public static double get3gramProbabilityFor(Language language, LanguageModel languageModel, int i, AnalyzedSentence analyzedSentence, String str) {
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(analyzedSentence, true, getGoogleStyleWordTokenizer(language));
        Optional<GoogleToken> findFirst = googleTokens.stream().filter(googleToken -> {
            return googleToken.startPos == i && !LanguageModel.GOOGLE_SENTENCE_START.equals(googleToken.token);
        }).findFirst();
        if (findFirst.isPresent()) {
            return get3gramProbabilityFor(language, languageModel, findFirst.get(), googleTokens, str);
        }
        logger.warn(String.format("Could not find matching Google token in tokenizations '%s' / '%s'", analyzedSentence.getText(), googleTokens));
        return 0.0d;
    }

    public static double get4gramProbabilityFor(Language language, LanguageModel languageModel, int i, AnalyzedSentence analyzedSentence, String str) {
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(analyzedSentence, true, getGoogleStyleWordTokenizer(language));
        Optional<GoogleToken> findFirst = googleTokens.stream().filter(googleToken -> {
            return googleToken.startPos == i && !LanguageModel.GOOGLE_SENTENCE_START.equals(googleToken.token);
        }).findFirst();
        if (findFirst.isPresent()) {
            return get4gramProbabilityFor(language, languageModel, findFirst.get(), googleTokens, str);
        }
        logger.warn(String.format("Could not find matching Google token in tokenizations '%s' / '%s'", analyzedSentence.getText(), googleTokens));
        return 0.0d;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static double get3gramProbabilityFor(Language language, LanguageModel languageModel, GoogleToken googleToken, List<GoogleToken> list, String str) {
        Probability pseudoProbability;
        Probability pseudoProbability2;
        Probability probability;
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(str, false, getGoogleStyleWordTokenizer(language));
        if (googleTokens.size() == 1) {
            List<String> context = getContext(googleToken, list, str, 0, 2);
            pseudoProbability = languageModel.getPseudoProbability(context);
            logger.trace(String.format("Left  : %.90f %s\n", Double.valueOf(pseudoProbability.getProb()), Arrays.asList(context)));
            List<String> context2 = getContext(googleToken, list, str, 1, 1);
            probability = languageModel.getPseudoProbability(context2);
            logger.trace(String.format("Middle: %.90f %s\n", Double.valueOf(probability.getProb()), Arrays.asList(context2)));
            List<String> context3 = getContext(googleToken, list, str, 2, 0);
            pseudoProbability2 = languageModel.getPseudoProbability(context3);
            logger.trace(String.format("Right : %.90f %s\n", Double.valueOf(pseudoProbability2.getProb()), Arrays.asList(context3)));
        } else {
            if (googleTokens.size() != 2) {
                logger.warn("Words that consists of more than 2 tokens (according to Google tokenization) are not supported yet: " + str + " -> " + googleTokens);
                return 0.0d;
            }
            pseudoProbability = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 0, 1));
            pseudoProbability2 = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 1, 0));
            probability = new Probability((pseudoProbability.getProb() + pseudoProbability2.getProb()) / 2.0d, 1.0f);
        }
        if (pseudoProbability.getCoverage() >= 0.5f || probability.getCoverage() >= 0.5f || pseudoProbability2.getCoverage() >= 0.5f) {
            return pseudoProbability.getProb() * probability.getProb() * pseudoProbability2.getProb();
        }
        logger.trace(String.format("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(pseudoProbability.getCoverage()), Float.valueOf(probability.getCoverage()), Float.valueOf(pseudoProbability2.getCoverage())));
        return 0.0d;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static double get4gramProbabilityFor(Language language, LanguageModel languageModel, GoogleToken googleToken, List<GoogleToken> list, String str) {
        Probability pseudoProbability;
        Probability pseudoProbability2;
        Probability probability;
        Probability pseudoProbability3;
        List<GoogleToken> googleTokens = GoogleToken.getGoogleTokens(str, false, getGoogleStyleWordTokenizer(language));
        if (googleTokens.size() == 1) {
            pseudoProbability = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 0, 3));
            pseudoProbability2 = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 2, 1));
            probability = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 1, 2));
            pseudoProbability3 = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 3, 0));
        } else {
            if (googleTokens.size() != 2) {
                logger.warn("Words that consists of more than 2 tokens (according to Google tokenization) are not supported yet: " + str + " -> " + googleTokens);
                return 0.0d;
            }
            pseudoProbability = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 0, 2));
            pseudoProbability2 = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 1, 1));
            probability = pseudoProbability2;
            pseudoProbability3 = languageModel.getPseudoProbability(getContext(googleToken, list, googleTokens, 2, 0));
        }
        if (pseudoProbability.getCoverage() >= 0.5f || pseudoProbability2.getCoverage() >= 0.5f || probability.getCoverage() >= 0.5f || pseudoProbability3.getCoverage() >= 0.5f) {
            return Math.exp(pseudoProbability.getLogProb() + pseudoProbability2.getLogProb() + probability.getLogProb() + pseudoProbability3.getLogProb());
        }
        logger.trace(String.format("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(pseudoProbability.getCoverage()), Float.valueOf(pseudoProbability2.getCoverage()), Float.valueOf(probability.getCoverage()), Float.valueOf(pseudoProbability3.getCoverage())));
        return 0.0d;
    }
}
