package org.canova.nlp.metadata;

import org.canova.api.berkeley.Counter;
import org.canova.api.conf.Configuration;
import org.canova.api.util.Index;
import org.canova.api.util.MathUtils;
import org.canova.nlp.vectorizer.TextVectorizer;

/* loaded from: input_file:org/canova/nlp/metadata/DefaultVocabCache.class */
public class DefaultVocabCache implements VocabCache {
    private int minWordFrequency;
    private Counter<String> wordFrequencies = new Counter<>();
    private Counter<String> docFrequencies = new Counter<>();
    private Index vocabWords = new Index();
    private double numDocs = 0.0d;

    public DefaultVocabCache(int i) {
        this.minWordFrequency = i;
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void incrementNumDocs(double d) {
        this.numDocs += d;
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public double numDocs() {
        return this.numDocs;
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public String wordAt(int i) {
        return this.vocabWords.get(i).toString();
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void initialize(Configuration configuration) {
        this.minWordFrequency = configuration.getInt(TextVectorizer.MIN_WORD_FREQUENCY, 5);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public double wordFrequency(String str) {
        return this.wordFrequencies.getCount(str);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public int minWordFrequency() {
        return this.minWordFrequency;
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public Index vocabWords() {
        return this.vocabWords;
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void incrementDocCount(String str) {
        incrementDocCount(str, 1.0d);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void incrementDocCount(String str, double d) {
        this.docFrequencies.incrementCount(str, d);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void incrementCount(String str) {
        incrementCount(str, 1.0d);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public void incrementCount(String str, double d) {
        this.wordFrequencies.incrementCount(str, d);
        if (this.wordFrequencies.getCount(str) < this.minWordFrequency || this.vocabWords.indexOf(str) >= 0) {
            return;
        }
        this.vocabWords.add(str);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public double idf(String str) {
        return this.docFrequencies.getCount(str);
    }

    @Override // org.canova.nlp.metadata.VocabCache
    public double tfidf(String str, double d) {
        return MathUtils.tfidf(MathUtils.tf((int) d), MathUtils.idf(this.numDocs, idf(str)));
    }

    public int getMinWordFrequency() {
        return this.minWordFrequency;
    }

    public void setMinWordFrequency(int i) {
        this.minWordFrequency = i;
    }
}
