package org.codelibs.elasticsearch.vi.nlp.lang.model.unigram;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.codelibs.elasticsearch.vi.nlp.lang.model.IConstants;
import org.codelibs.elasticsearch.vi.nlp.lexicon.LexiconMarshaller;
import org.codelibs.elasticsearch.vi.nlp.utils.UTF8FileUtility;

/* loaded from: input_file:org/codelibs/elasticsearch/vi/nlp/lang/model/unigram/Unigram.class */
public class Unigram {
    private static final Logger logger = LogManager.getLogger(Unigram.class);
    private static Map<String, Integer> UNIGRAM;
    private static Unigram MODEL;

    private Unigram() {
        init();
    }

    private void init() {
        UNIGRAM = new HashMap();
    }

    public static Unigram getInstance() {
        if (MODEL == null) {
            MODEL = new Unigram();
        }
        return MODEL;
    }

    private static boolean isDirectory(String str) {
        return new File(str).isDirectory();
    }

    public static void loadCorpora(String str) {
        File[] listFiles = new File(IConstants.CORPORA_DIRECTORY).listFiles();
        for (File file : listFiles) {
            String path = file.getPath();
            if (!isDirectory(path)) {
                try {
                    loadCorpus(path);
                } catch (IOException e) {
                    logger.warn(e);
                }
            }
        }
        logger.error("Total " + listFiles.length + " files loaded.");
    }

    private static void processLoadedCorpus(List<String> list) {
        for (String str : list) {
            if (UNIGRAM.containsKey(str)) {
                UNIGRAM.put(str, new Integer(UNIGRAM.get(str).intValue() + 1));
            } else {
                UNIGRAM.put(str, new Integer(1));
            }
        }
    }

    public static void loadCorpusFromStream(InputStream inputStream) throws IOException {
        IOUtils.readLines(inputStream, "UTF-8");
    }

    public static void loadCorpus(String str) throws IOException {
        processLoadedCorpus(FileUtils.readLines(new File(str), "UTF-8"));
    }

    public static Map<String, Integer> getFrequencies() {
        return UNIGRAM;
    }

    public static void print(String str) {
        UTF8FileUtility.createWriter(str);
        StringBuffer stringBuffer = new StringBuffer(1024);
        int i = 0;
        for (String str2 : UNIGRAM.keySet()) {
            int intValue = UNIGRAM.get(str2).intValue();
            i += intValue;
            stringBuffer.append(str2 + '\t' + intValue + "\n");
        }
        UTF8FileUtility.write(stringBuffer.toString());
        UTF8FileUtility.closeWriter();
        logger.error("# of   tokens = " + i);
        logger.error("# of unigrams = " + UNIGRAM.size());
    }

    public static void marshal(String str) {
        new LexiconMarshaller().marshal(UNIGRAM, str);
    }
}
