package org.deeplearning4j.text.invertedindex;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.deeplearning4j.berkeley.StringUtils;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.stopwords.StopWords;

/* loaded from: input_file:org/deeplearning4j/text/invertedindex/LuceneInvertedIndex.class */
public class LuceneInvertedIndex implements InvertedIndex {
    private transient Directory dir;
    private transient IndexReader reader;
    private transient Analyzer analyzer;
    private transient IndexSearcher searcher;
    private transient IndexWriter writer;
    private transient IndexWriterConfig iwc;
    private VocabCache vocabCache;
    public static final String WORD_FIELD = "word";
    private int numDocs;
    private List<List<VocabWord>> words;
    private boolean cache;
    private transient ExecutorService indexManager;

    /* loaded from: input_file:org/deeplearning4j/text/invertedindex/LuceneInvertedIndex$Builder.class */
    public static class Builder {
        private File indexDir;
        private Directory dir;
        private IndexReader reader;
        private IndexSearcher searcher;
        private IndexWriter writer;
        private VocabCache vocabCache;
        private Analyzer analyzer;
        private IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_0, this.analyzer);
        private List<String> stopWords = StopWords.getStopWords();
        private boolean cache = true;

        public Builder cacheDocsInMemory(boolean z) {
            this.cache = z;
            return this;
        }

        public Builder indexDir(File file) {
            this.indexDir = file;
            return this;
        }

        public Builder cache(VocabCache vocabCache) {
            this.vocabCache = vocabCache;
            return this;
        }

        public Builder stopWords(List<String> list) {
            this.stopWords = list;
            return this;
        }

        public Builder dir(Directory directory) {
            this.dir = directory;
            return this;
        }

        public Builder reader(IndexReader indexReader) {
            this.reader = indexReader;
            return this;
        }

        public Builder writer(IndexWriter indexWriter) {
            this.writer = indexWriter;
            return this;
        }

        public Builder analyzer(Analyzer analyzer) {
            this.analyzer = analyzer;
            return this;
        }

        public InvertedIndex build() {
            LuceneInvertedIndex luceneInvertedIndex = new LuceneInvertedIndex();
            try {
                if (this.analyzer == null) {
                    this.analyzer = new StandardAnalyzer(new InputStreamReader(new ByteArrayInputStream(StringUtils.join(this.stopWords, "\n").getBytes())));
                }
                if (this.indexDir != null && this.dir != null) {
                    throw new IllegalStateException("Please define only a directory or a file directory");
                }
                if (this.iwc == null) {
                    this.iwc = new IndexWriterConfig(Version.LATEST, this.analyzer);
                }
                if (this.indexDir != null && !this.cache) {
                    if (!this.indexDir.exists()) {
                        this.indexDir.mkdirs();
                    }
                    this.dir = FSDirectory.open(this.indexDir);
                    if (this.writer == null) {
                        this.writer = new IndexWriter(this.dir, this.iwc);
                    }
                }
                if (this.vocabCache == null) {
                    throw new IllegalStateException("Vocab cache must not be null");
                }
                luceneInvertedIndex.vocabCache = this.vocabCache;
                luceneInvertedIndex.dir = this.dir;
                luceneInvertedIndex.writer = this.writer;
                luceneInvertedIndex.cache = this.cache;
                luceneInvertedIndex.reader = this.reader;
                luceneInvertedIndex.searcher = this.searcher;
                luceneInvertedIndex.analyzer = this.analyzer;
                luceneInvertedIndex.vocabCache = this.vocabCache;
                return luceneInvertedIndex;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    public LuceneInvertedIndex(VocabCache vocabCache, boolean z) {
        this.iwc = new IndexWriterConfig(Version.LATEST, this.analyzer);
        this.numDocs = 0;
        this.words = new CopyOnWriteArrayList();
        this.cache = true;
        try {
            index("word2vec-index", z);
            this.vocabCache = vocabCache;
            this.cache = z;
            this.indexManager = Executors.newFixedThreadPool(1);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private LuceneInvertedIndex() {
        this.iwc = new IndexWriterConfig(Version.LATEST, this.analyzer);
        this.numDocs = 0;
        this.words = new CopyOnWriteArrayList();
        this.cache = true;
        this.indexManager = Executors.newFixedThreadPool(1);
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public List<VocabWord> document(int i) {
        if (this.cache) {
            List<VocabWord> list = this.words.get(i);
            ArrayList arrayList = new ArrayList();
            for (VocabWord vocabWord : list) {
                if (this.vocabCache.containsWord(vocabWord.getWord())) {
                    arrayList.add(vocabWord);
                }
            }
            return arrayList;
        }
        CopyOnWriteArrayList copyOnWriteArrayList = new CopyOnWriteArrayList();
        try {
            for (String str : this.reader.document(i).getValues(WORD_FIELD)) {
                copyOnWriteArrayList.add(this.vocabCache.wordFor(str));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return copyOnWriteArrayList;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public List<Integer> documents(VocabWord vocabWord) {
        try {
            TopDocs search = this.searcher.search(new TermQuery(new Term(WORD_FIELD, vocabWord.getWord())), Integer.MAX_VALUE);
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < search.totalHits; i++) {
                arrayList.add(Integer.valueOf(search.scoreDocs[i].doc));
            }
            return arrayList;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public int numDocuments() {
        if (this.numDocs > 0) {
            return this.numDocs;
        }
        try {
            initReader();
            return this.reader.numDocs();
        } catch (Exception e) {
            return 0;
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public Collection<Integer> allDocs() {
        if (this.cache) {
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < this.words.size(); i++) {
                arrayList.add(Integer.valueOf(i));
            }
            return arrayList;
        }
        ArrayList arrayList2 = new ArrayList();
        for (int i2 = 0; i2 < this.reader.maxDoc(); i2++) {
            arrayList2.add(Integer.valueOf(i2));
        }
        return arrayList2;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void addWordToDoc(int i, VocabWord vocabWord) {
        TextField textField = new TextField(WORD_FIELD, vocabWord.getWord(), Field.Store.YES);
        try {
            initReader();
            Document doc = this.searcher.doc(i);
            if (doc != null) {
                doc.add(textField);
            } else {
                new Document().add(textField);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        initReader();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void initReader() {
        if (this.reader == null) {
            try {
                this.writer.commit();
                this.reader = DirectoryReader.open(this.dir);
                this.searcher = new IndexSearcher(this.reader);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void addWordsToDoc(int i, final List<VocabWord> list) {
        if (this.cache) {
            this.words.add(list);
            this.indexManager.execute(new Runnable() { // from class: org.deeplearning4j.text.invertedindex.LuceneInvertedIndex.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        if (LuceneInvertedIndex.this.writer == null) {
                            LuceneInvertedIndex.this.index("word2vec-path", true);
                        }
                        Document document = new Document();
                        Iterator it = list.iterator();
                        while (it.hasNext()) {
                            document.add(new TextField(LuceneInvertedIndex.WORD_FIELD, ((VocabWord) it.next()).getWord(), Field.Store.YES));
                        }
                        LuceneInvertedIndex.this.writer.addDocument(document, LuceneInvertedIndex.this.analyzer);
                        LuceneInvertedIndex.this.initReader();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
            return;
        }
        Document document = new Document();
        Iterator<VocabWord> it = list.iterator();
        while (it.hasNext()) {
            document.add(new TextField(WORD_FIELD, it.next().getWord(), Field.Store.YES));
        }
        try {
            this.writer.addDocument(document, this.analyzer);
            initReader();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void finish() {
        if (this.cache) {
            this.indexManager.execute(new Runnable() { // from class: org.deeplearning4j.text.invertedindex.LuceneInvertedIndex.2
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        if (LuceneInvertedIndex.this.dir == null) {
                            LuceneInvertedIndex.this.dir = FSDirectory.open(new File("word2vec-index"));
                        }
                        if (LuceneInvertedIndex.this.writer == null) {
                            LuceneInvertedIndex.this.writer = new IndexWriter(LuceneInvertedIndex.this.dir, LuceneInvertedIndex.this.iwc);
                        }
                        LuceneInvertedIndex.this.writer.forceMerge(1);
                        LuceneInvertedIndex.this.writer.commit();
                        LuceneInvertedIndex.this.initReader();
                        LuceneInvertedIndex.this.numDocs = LuceneInvertedIndex.this.reader.numDocs();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            });
            return;
        }
        try {
            this.writer.forceMerge(1);
            this.writer.commit();
        } catch (IOException e) {
            e.printStackTrace();
        }
        initReader();
        this.numDocs = this.reader.numDocs();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void index(String str, boolean z) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            file.mkdir();
        }
        this.analyzer = new StandardAnalyzer(new InputStreamReader(new ByteArrayInputStream("".getBytes())));
        if (z) {
            this.iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            this.iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }
        this.iwc.setRAMBufferSizeMB(5000.0d);
        this.dir = FSDirectory.open(file);
        this.writer = new IndexWriter(this.dir, this.iwc);
        initReader();
    }
}
