package org.codelibs.elasticsearch.vi.nlp.lang.model.unigram;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.elasticsearch.vi.nlp.utils.UTF8FileUtility;

/* loaded from: input_file:org/codelibs/elasticsearch/vi/nlp/lang/model/unigram/VocabularyBuilder.class */
public final class VocabularyBuilder {
    private static final Logger logger = LogManager.getLogger(VocabularyBuilder.class);
    private final List<String> vocabulary;

    public VocabularyBuilder(Unigram unigram, int i) {
        this.vocabulary = new ArrayList(100);
        Map<String, Integer> frequencies = Unigram.getFrequencies();
        for (String str : frequencies.keySet()) {
            if (frequencies.get(str).intValue() >= i) {
                this.vocabulary.add(str);
            }
        }
        Collections.sort(this.vocabulary);
    }

    public VocabularyBuilder(Unigram unigram) {
        this(unigram, 1);
    }

    public void print(String str) {
        UTF8FileUtility.createWriter(str);
        StringBuffer stringBuffer = new StringBuffer(1024);
        Iterator<String> it = this.vocabulary.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next() + "\n");
        }
        UTF8FileUtility.write(stringBuffer.toString());
        UTF8FileUtility.closeWriter();
        logger.error("# of  words = " + this.vocabulary.size());
    }
}
