package fr.erias.IAMsystem.lucene;

import fr.erias.IAMsystem.stopwords.IStopwords;
import fr.erias.IAMsystem.terminology.Term;
import fr.erias.IAMsystem.terminology.Terminology;
import fr.erias.IAMsystem.tokenizernormalizer.ITokenizerNormalizer;
import fr.erias.IAMsystem.tokenizernormalizer.TokenizerNormalizer;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;

/* loaded from: input_file:fr/erias/IAMsystem/lucene/IndexBigramLucene.class */
public class IndexBigramLucene {
    public static final String CONCATENATED_FIELD = "CONCATENATED_FIELD";
    public static final String BIGRAM_FIELD = "BIGRAM_FIELD";
    public static final String LUCENE_INDEX_FOLDER = "LUCENE_INDEX_FOLDER";

    public static void IndexLuceneUniqueTokensBigram(Terminology terminology, ITokenizerNormalizer iTokenizerNormalizer) throws IOException {
        IndexLuceneUniqueTokensBigram(terminology, iTokenizerNormalizer, new File(LUCENE_INDEX_FOLDER));
    }

    public static void IndexLuceneUniqueTokensBigram(Terminology terminology, ITokenizerNormalizer iTokenizerNormalizer, File file) throws IOException {
        IndexLuceneUniqueTokensBigram(getUniqueTokenBigram(terminology, iTokenizerNormalizer), file);
    }

    private static void IndexLuceneUniqueTokensBigram(HashMap<String, String> hashMap, File file) throws IOException {
        FSDirectory open = FSDirectory.open(file.toPath());
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig();
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
        int i = 0;
        for (Map.Entry<String, String> entry : hashMap.entrySet()) {
            String key = entry.getKey();
            String value = entry.getValue();
            Document document = new Document();
            document.add(new StringField(CONCATENATED_FIELD, key, Field.Store.YES));
            document.add(new StringField(BIGRAM_FIELD, value, Field.Store.YES));
            indexWriter.addDocument(document);
            i++;
        }
        indexWriter.close();
        open.close();
    }

    private static HashMap<String, String> getUniqueTokenBigram(Terminology terminology, ITokenizerNormalizer iTokenizerNormalizer) {
        IStopwords stopwords = iTokenizerNormalizer.getNormalizer().getStopwords();
        HashMap<String, String> hashMap = new HashMap<>();
        Iterator<Term> it = terminology.getTerms().iterator();
        while (it.hasNext()) {
            String[] removeStopWords = IStopwords.removeStopWords(stopwords, iTokenizerNormalizer.getTokenizer().tokenize(it.next().getNormalizedLabel()));
            for (int i = 0; i < removeStopWords.length - 1; i++) {
                if (removeStopWords[i + 1].length() != 1) {
                    hashMap.put(removeStopWords[i] + removeStopWords[i + 1], removeStopWords[i] + " " + removeStopWords[i + 1]);
                }
            }
            for (String str : removeStopWords) {
                hashMap.put(str, str);
            }
        }
        return hashMap;
    }

    private static HashMap<String, String> getUniqueToken2index(Terminology terminology, TokenizerNormalizer tokenizerNormalizer) throws IOException {
        IStopwords stopwords = tokenizerNormalizer.getNormalizer().getStopwords();
        HashMap<String, String> hashMap = new HashMap<>();
        Iterator<Term> it = terminology.getTerms().iterator();
        while (it.hasNext()) {
            for (String str : IStopwords.removeStopWords(stopwords, tokenizerNormalizer.getTokenizer().tokenize(it.next().getNormalizedLabel()))) {
                if (str.length() >= 5) {
                    hashMap.put(str, str);
                }
            }
        }
        return hashMap;
    }
}
