package fr.erias.IAMsystem.load;

import fr.erias.IAMsystem.normalizer.NormalizerTerm;
import fr.erias.IAMsystem.normalizer.Stopwords;
import fr.erias.IAMsystem.tokenizer.TokenizerNormalizer;
import fr.erias.IAMsystem.tree.SetTokenTree;
import fr.erias.IAMsystem.tree.TokenTree;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:fr/erias/IAMsystem/load/Loader.class */
public class Loader {
    static final Logger logger = LoggerFactory.getLogger(Loader.class);

    public static HashSet<String> getUniqueToken(File file, String str, int i) throws IOException {
        HashSet<String> hashSet = new HashSet<>();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashSet;
            }
            for (String str2 : TokenizerNormalizer.tokenizeAlphaNum(readLine.split(str)[i])) {
                hashSet.add(str2);
            }
        }
    }

    public static HashMap<String, String> getUniqueTokenBigram(Stopwords stopwords, File file, String str, int i) throws IOException {
        TokenizerNormalizer tokenizerNormalizer = getTokenizerNormalizer(stopwords);
        HashMap<String, String> hashMap = new HashMap<>();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return hashMap;
            }
            String str2 = readLine.split(str)[i];
            if (!tokenizerNormalizer.getNormalizerTerm().isStopWord(str2)) {
                String[] removeStopWords = removeStopWords(stopwords, TokenizerNormalizer.tokenizeAlphaNum(str2));
                for (int i2 = 0; i2 < removeStopWords.length - 1; i2++) {
                    if (removeStopWords[i2 + 1].length() != 1) {
                        hashMap.put(removeStopWords[i2] + removeStopWords[i2 + 1], removeStopWords[i2] + " " + removeStopWords[i2 + 1]);
                    }
                }
                for (String str3 : removeStopWords) {
                    hashMap.put(str3, str3);
                }
            }
        }
    }

    public static String[] removeStopWords(Stopwords stopwords, String[] strArr) {
        String[] strArr2 = new String[strArr.length];
        int i = 0;
        for (int i2 = 0; i2 < strArr.length; i2++) {
            String str = strArr[i2];
            if (stopwords.isStopWord(str)) {
                i++;
            } else {
                strArr2[i2 - i] = str;
            }
        }
        return (String[]) Arrays.copyOfRange(strArr2, 0, strArr.length - i);
    }

    public static SetTokenTree loadTokenTree(File file, Stopwords stopwords, String str, int i, int i2) throws IOException {
        SetTokenTree setTokenTree = new SetTokenTree();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                logger.info("tokenTreeSet0 size : " + setTokenTree.getAvailableTokens().size());
                return setTokenTree;
            }
            String[] split = readLine.split(str);
            String str2 = split[i];
            if (!stopwords.isStopWord(str2)) {
                String str3 = split[i2];
                String[] removeStopWords = removeStopWords(stopwords, TokenizerNormalizer.tokenizeAlphaNum(str2));
                if (removeStopWords.length != 0) {
                    setTokenTree.addTokenTree(new TokenTree(null, removeStopWords, str3));
                }
            }
        }
    }

    public static TokenizerNormalizer getTokenizerNormalizer(Stopwords stopwords) {
        return new TokenizerNormalizer(new NormalizerTerm(stopwords));
    }
}
