/*
 * Decompiled with CFR 0.152.
 */
package cc.redpen.validator.document;

import cc.redpen.RedPenException;
import cc.redpen.model.Document;
import cc.redpen.model.Paragraph;
import cc.redpen.model.Sentence;
import cc.redpen.tokenizer.TokenElement;
import cc.redpen.util.DictionaryLoader;
import cc.redpen.validator.sentence.SpellingDictionaryValidator;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.Map;

public class WordFrequencyValidator
extends SpellingDictionaryValidator {
    private static final String DEFAULT_RESOURCE_PATH = "default-resources/word-frequency";
    private Map<String, Double> referenceWordFrequencies;
    private Map<String, Double> referenceWordDeviations;
    private Map<String, Integer> documentWordOccurances = new HashMap<String, Integer>();
    private int wordCount = 0;
    private Sentence lastSentence;

    public WordFrequencyValidator() {
        this.setDefaultProperties("deviation_factor", Float.valueOf(3.0f), "min_word_count", 200);
    }

    @Override
    protected void init() throws RedPenException {
        super.init();
        String defaultDictionaryFile = "default-resources/word-frequency/word-frequency-" + this.getSymbolTable().getLang() + ".dat";
        this.referenceWordDeviations = new HashMap<String, Double>();
        this.referenceWordFrequencies = new DictionaryLoader<Map>(HashMap::new, (set, line) -> {
            String[] fields = line.split(" ");
            set.put(fields[1], Double.valueOf(fields[0]));
        }).loadCachedFromResource(defaultDictionaryFile, "word frequencies");
        this.initDeviations(this.referenceWordFrequencies, this.referenceWordDeviations);
    }

    private void processSentence(Sentence sentence) {
        for (TokenElement token : sentence.getTokens()) {
            String word = token.getSurface().toLowerCase();
            if (this.referenceWordDeviations.get(word) != null) {
                Integer occurrences = this.documentWordOccurances.get(word);
                if (occurrences == null) {
                    this.documentWordOccurances.put(word, 1);
                } else {
                    this.documentWordOccurances.put(word, occurrences + 1);
                }
                ++this.wordCount;
                continue;
            }
            if (this.dictionaryExists() && !this.inDictionary(word)) continue;
            ++this.wordCount;
        }
    }

    private double initDeviations(Map<String, Double> histogram, Map<String, Double> deviations) {
        double sum = 0.0;
        for (String word : histogram.keySet()) {
            sum += histogram.get(word).doubleValue();
        }
        double size = 100.0;
        double mean = sum / size;
        sum = 0.0;
        for (String word : histogram.keySet()) {
            double diff = histogram.get(word) - mean;
            sum += diff * diff;
        }
        double stddev = Math.sqrt(sum / size);
        for (String word : histogram.keySet()) {
            deviations.put(word, Math.abs(histogram.get(word) - mean));
        }
        return stddev;
    }

    @Override
    public void validate(Document document) {
        this.lastSentence = null;
        for (int i = 0; i < document.size(); ++i) {
            for (Paragraph para : document.getSection(i).getParagraphs()) {
                for (Sentence sentence : para.getSentences()) {
                    this.processSentence(sentence);
                    this.lastSentence = sentence;
                }
            }
        }
        if (this.wordCount >= this.getInt("min_word_count")) {
            HashMap<String, Double> documentWordFrequencies = new HashMap<String, Double>();
            this.documentWordOccurances.forEach((word, count) -> documentWordFrequencies.put((String)word, 100.0 * (double)count.intValue() / (double)this.wordCount));
            DecimalFormat df = new DecimalFormat("0.00");
            HashMap<String, Double> documentDeviations = new HashMap<String, Double>();
            double stddev = this.initDeviations(documentWordFrequencies, documentDeviations);
            documentDeviations.forEach((word, deviation) -> {
                Double referenceDeviation = this.referenceWordDeviations.get(word);
                if (referenceDeviation != null) {
                    double devRatio = deviation / stddev;
                    double docPercentage = (Double)documentWordFrequencies.get(word);
                    double referencePercentage = this.referenceWordFrequencies.get(word);
                    if (devRatio > 1.0 && docPercentage > referencePercentage * (double)this.getFloat("deviation_factor")) {
                        this.addLocalizedError("WordUsedTooFrequently", this.lastSentence, word, df.format(docPercentage), df.format(referencePercentage));
                    }
                }
            });
        }
    }
}

