package cc.redpen.validator.document;

import cc.redpen.RedPenException;
import cc.redpen.model.Document;
import cc.redpen.model.Paragraph;
import cc.redpen.model.Sentence;
import cc.redpen.tokenizer.TokenElement;
import cc.redpen.util.DictionaryLoader;
import cc.redpen.validator.sentence.SpellingDictionaryValidator;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/* loaded from: input_file:WEB-INF/lib/redpen-core-1.9.0.jar:cc/redpen/validator/document/WordFrequencyValidator.class */
public class WordFrequencyValidator extends SpellingDictionaryValidator {
    private static final String DEFAULT_RESOURCE_PATH = "default-resources/word-frequency";
    private Map<String, Double> referenceWordFrequencies;
    private Map<String, Double> referenceWordDeviations;
    private Map<String, Integer> documentWordOccurances = new HashMap();
    private int wordCount = 0;
    private Sentence lastSentence;

    public WordFrequencyValidator() {
        setDefaultProperties("deviation_factor", Float.valueOf(3.0f), "min_word_count", 200);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // cc.redpen.validator.DictionaryValidator, cc.redpen.validator.Validator
    public void init() throws RedPenException {
        super.init();
        String str = "default-resources/word-frequency/word-frequency-" + getSymbolTable().getLang() + ".dat";
        this.referenceWordDeviations = new HashMap();
        this.referenceWordFrequencies = (Map) new DictionaryLoader(HashMap::new, (map, str2) -> {
            String[] split = str2.split(" ");
            map.put(split[1], Double.valueOf(split[0]));
        }).loadCachedFromResource(str, "word frequencies");
        initDeviations(this.referenceWordFrequencies, this.referenceWordDeviations);
    }

    private void processSentence(Sentence sentence) {
        Iterator<TokenElement> it = sentence.getTokens().iterator();
        while (it.hasNext()) {
            String lowerCase = it.next().getSurface().toLowerCase();
            if (this.referenceWordDeviations.get(lowerCase) != null) {
                Integer num = this.documentWordOccurances.get(lowerCase);
                if (num == null) {
                    this.documentWordOccurances.put(lowerCase, 1);
                } else {
                    this.documentWordOccurances.put(lowerCase, Integer.valueOf(num.intValue() + 1));
                }
                this.wordCount++;
            } else if (!dictionaryExists() || inDictionary(lowerCase)) {
                this.wordCount++;
            }
        }
    }

    private double initDeviations(Map<String, Double> map, Map<String, Double> map2) {
        double d = 0.0d;
        Iterator<String> it = map.keySet().iterator();
        while (it.hasNext()) {
            d += map.get(it.next()).doubleValue();
        }
        double d2 = d / 100.0d;
        double d3 = 0.0d;
        Iterator<String> it2 = map.keySet().iterator();
        while (it2.hasNext()) {
            double doubleValue = map.get(it2.next()).doubleValue() - d2;
            d3 += doubleValue * doubleValue;
        }
        double sqrt = Math.sqrt(d3 / 100.0d);
        for (String str : map.keySet()) {
            map2.put(str, Double.valueOf(Math.abs(map.get(str).doubleValue() - d2)));
        }
        return sqrt;
    }

    @Override // cc.redpen.validator.Validator
    public void validate(Document document) {
        this.lastSentence = null;
        for (int i = 0; i < document.size(); i++) {
            Iterator<Paragraph> it = document.getSection(i).getParagraphs().iterator();
            while (it.hasNext()) {
                for (Sentence sentence : it.next().getSentences()) {
                    processSentence(sentence);
                    this.lastSentence = sentence;
                }
            }
        }
        if (this.wordCount >= getInt("min_word_count")) {
            HashMap hashMap = new HashMap();
            this.documentWordOccurances.forEach((str, num) -> {
                hashMap.put(str, Double.valueOf((100.0d * num.intValue()) / this.wordCount));
            });
            DecimalFormat decimalFormat = new DecimalFormat("0.00");
            HashMap hashMap2 = new HashMap();
            double initDeviations = initDeviations(hashMap, hashMap2);
            hashMap2.forEach((str2, d) -> {
                if (this.referenceWordDeviations.get(str2) != null) {
                    double doubleValue = d.doubleValue() / initDeviations;
                    double doubleValue2 = ((Double) hashMap.get(str2)).doubleValue();
                    double doubleValue3 = this.referenceWordFrequencies.get(str2).doubleValue();
                    if (doubleValue <= 1.0d || doubleValue2 <= doubleValue3 * getFloat("deviation_factor")) {
                        return;
                    }
                    addLocalizedError("WordUsedTooFrequently", this.lastSentence, str2, decimalFormat.format(doubleValue2), decimalFormat.format(doubleValue3));
                }
            });
        }
    }
}
