package org.deeplearning4j.text.invertedindex;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import org.deeplearning4j.berkeley.StringUtils;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.stopwords.StopWords;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/deeplearning4j/text/invertedindex/LuceneInvertedIndex.class */
public class LuceneInvertedIndex implements InvertedIndex, IndexReader.ReaderClosedListener, Iterator<List<VocabWord>> {
    private transient Directory dir;
    private transient IndexReader reader;
    private transient Analyzer analyzer;
    private transient IndexSearcher searcher;
    private transient IndexWriter writer;
    private VocabCache vocabCache;
    public static final String WORD_FIELD = "word";
    private int numDocs;
    private List<List<VocabWord>> words;
    private boolean cache;
    private transient ExecutorService indexManager;
    private AtomicBoolean indexBeingCreated;
    private static Logger log = LoggerFactory.getLogger(LuceneInvertedIndex.class);
    public static final String INDEX_PATH = "word2vec-index";
    private AtomicBoolean readerClosed;
    private AtomicInteger totalWords;
    private int batchSize;
    private List<List<VocabWord>> miniBatches;
    private double sample;
    private AtomicLong nextRandom;
    private String indexPath;
    private transient ScheduledExecutorService miniBatchManager;
    private Queue<List<VocabWord>> miniBatchDocs;
    private AtomicBoolean miniBatchGoing;

    /* loaded from: input_file:org/deeplearning4j/text/invertedindex/LuceneInvertedIndex$Builder.class */
    public static class Builder {
        private File indexDir;
        private Directory dir;
        private IndexReader reader;
        private IndexSearcher searcher;
        private IndexWriter writer;
        private VocabCache vocabCache;
        private Analyzer analyzer;
        private IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_0, this.analyzer);
        private List<String> stopWords = StopWords.getStopWords();
        private boolean cache = true;
        private int batchSize = 1000;
        private double sample = 0.0d;

        public Builder cacheInRam(boolean z) {
            this.cache = z;
            return this;
        }

        public Builder sample(double d) {
            this.sample = d;
            return this;
        }

        public Builder batchSize(int i) {
            this.batchSize = i;
            return this;
        }

        public Builder indexDir(File file) {
            this.indexDir = file;
            return this;
        }

        public Builder cache(VocabCache vocabCache) {
            this.vocabCache = vocabCache;
            return this;
        }

        public Builder stopWords(List<String> list) {
            this.stopWords = list;
            return this;
        }

        public Builder dir(Directory directory) {
            this.dir = directory;
            return this;
        }

        public Builder reader(IndexReader indexReader) {
            this.reader = indexReader;
            return this;
        }

        public Builder writer(IndexWriter indexWriter) {
            this.writer = indexWriter;
            return this;
        }

        public Builder analyzer(Analyzer analyzer) {
            this.analyzer = analyzer;
            return this;
        }

        public InvertedIndex build() {
            LuceneInvertedIndex luceneInvertedIndex = new LuceneInvertedIndex();
            try {
                if (this.analyzer == null) {
                    this.analyzer = new StandardAnalyzer(new InputStreamReader(new ByteArrayInputStream(StringUtils.join(this.stopWords, "\n").getBytes())));
                }
                if (this.indexDir != null && this.dir != null) {
                    throw new IllegalStateException("Please define only a directory or a file directory");
                }
                if (this.iwc == null) {
                    this.iwc = new IndexWriterConfig(Version.LATEST, this.analyzer);
                }
                if (this.indexDir != null && !this.cache) {
                    if (!this.indexDir.exists()) {
                        this.indexDir.mkdirs();
                    }
                    this.dir = FSDirectory.open(this.indexDir);
                    if (this.writer == null) {
                        this.writer = new IndexWriter(this.dir, this.iwc);
                    }
                }
                if (this.vocabCache == null) {
                    throw new IllegalStateException("Vocab cache must not be null");
                }
                luceneInvertedIndex.batchSize = this.batchSize;
                luceneInvertedIndex.vocabCache = this.vocabCache;
                luceneInvertedIndex.dir = this.dir;
                luceneInvertedIndex.writer = this.writer;
                luceneInvertedIndex.cache = this.cache;
                luceneInvertedIndex.reader = this.reader;
                luceneInvertedIndex.searcher = this.searcher;
                luceneInvertedIndex.analyzer = this.analyzer;
                luceneInvertedIndex.vocabCache = this.vocabCache;
                return luceneInvertedIndex;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    public LuceneInvertedIndex(VocabCache vocabCache, boolean z) {
        this.numDocs = 0;
        this.words = Collections.synchronizedList(new ArrayList());
        this.cache = true;
        this.indexBeingCreated = new AtomicBoolean(false);
        this.readerClosed = new AtomicBoolean(false);
        this.totalWords = new AtomicInteger(0);
        this.batchSize = 1000;
        this.miniBatches = new CopyOnWriteArrayList();
        this.sample = 0.0d;
        this.nextRandom = new AtomicLong(5L);
        this.indexPath = INDEX_PATH;
        this.miniBatchManager = Executors.newScheduledThreadPool(1);
        this.miniBatchDocs = new ConcurrentLinkedDeque();
        this.miniBatchGoing = new AtomicBoolean(true);
        this.vocabCache = vocabCache;
        this.cache = z;
        this.indexManager = Executors.newFixedThreadPool(1);
        startMiniBatches();
    }

    public LuceneInvertedIndex(VocabCache vocabCache, boolean z, String str) {
        this.numDocs = 0;
        this.words = Collections.synchronizedList(new ArrayList());
        this.cache = true;
        this.indexBeingCreated = new AtomicBoolean(false);
        this.readerClosed = new AtomicBoolean(false);
        this.totalWords = new AtomicInteger(0);
        this.batchSize = 1000;
        this.miniBatches = new CopyOnWriteArrayList();
        this.sample = 0.0d;
        this.nextRandom = new AtomicLong(5L);
        this.indexPath = INDEX_PATH;
        this.miniBatchManager = Executors.newScheduledThreadPool(1);
        this.miniBatchDocs = new ConcurrentLinkedDeque();
        this.miniBatchGoing = new AtomicBoolean(true);
        this.vocabCache = vocabCache;
        this.cache = z;
        this.indexPath = str;
        this.indexManager = Executors.newFixedThreadPool(1);
        startMiniBatches();
    }

    private LuceneInvertedIndex() {
        this.numDocs = 0;
        this.words = Collections.synchronizedList(new ArrayList());
        this.cache = true;
        this.indexBeingCreated = new AtomicBoolean(false);
        this.readerClosed = new AtomicBoolean(false);
        this.totalWords = new AtomicInteger(0);
        this.batchSize = 1000;
        this.miniBatches = new CopyOnWriteArrayList();
        this.sample = 0.0d;
        this.nextRandom = new AtomicLong(5L);
        this.indexPath = INDEX_PATH;
        this.miniBatchManager = Executors.newScheduledThreadPool(1);
        this.miniBatchDocs = new ConcurrentLinkedDeque();
        this.miniBatchGoing = new AtomicBoolean(true);
        this.indexManager = Executors.newFixedThreadPool(1);
        startMiniBatches();
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public double sample() {
        return this.sample;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public Iterator<List<VocabWord>> miniBatches() {
        return this;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public List<VocabWord> document(int i) {
        if (this.cache) {
            List<VocabWord> list = this.words.get(i);
            ArrayList arrayList = new ArrayList();
            for (VocabWord vocabWord : list) {
                if (this.vocabCache.containsWord(vocabWord.getWord())) {
                    arrayList.add(vocabWord);
                }
            }
            return arrayList;
        }
        CopyOnWriteArrayList copyOnWriteArrayList = new CopyOnWriteArrayList();
        try {
            initReader();
            for (String str : this.reader.document(i).getValues("word")) {
                copyOnWriteArrayList.add(this.vocabCache.wordFor(str));
            }
        } catch (Exception e) {
            e.printStackTrace();
        } catch (AlreadyClosedException e2) {
            this.reader = null;
            this.readerClosed.set(false);
            return document(i);
        }
        return copyOnWriteArrayList;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public List<Integer> documents(VocabWord vocabWord) {
        try {
            TopDocs search = this.searcher.search(new TermQuery(new Term("word", vocabWord.getWord())), Integer.MAX_VALUE);
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < search.totalHits; i++) {
                arrayList.add(Integer.valueOf(search.scoreDocs[i].doc));
            }
            return arrayList;
        } catch (AlreadyClosedException e) {
            initReader();
            return documents(vocabWord);
        } catch (IOException e2) {
            throw new RuntimeException(e2);
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public int numDocuments() {
        if (this.numDocs > 0) {
            return this.numDocs;
        }
        try {
            initReader();
            return this.reader.numDocs();
        } catch (Exception e) {
            return 0;
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public Collection<Integer> allDocs() {
        if (this.cache) {
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < this.words.size(); i++) {
                arrayList.add(Integer.valueOf(i));
            }
            return arrayList;
        }
        ArrayList arrayList2 = new ArrayList();
        Bits liveDocs = MultiFields.getLiveDocs(this.reader);
        for (int i2 = 0; i2 < this.reader.maxDoc() + 1; i2++) {
            if (liveDocs == null || liveDocs.get(i2)) {
                arrayList2.add(Integer.valueOf(i2));
            }
        }
        return arrayList2;
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void addWordToDoc(int i, VocabWord vocabWord) {
        TextField textField = new TextField("word", vocabWord.getWord(), Field.Store.YES);
        try {
            initReader();
            Document doc = this.searcher.doc(i);
            if (doc != null) {
                doc.add(textField);
            } else {
                new Document().add(textField);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        initReader();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void initReader() {
        if (this.reader == null) {
            try {
                this.writer.commit();
                this.reader = DirectoryReader.open(this.dir);
                this.searcher = new IndexSearcher(this.reader);
                return;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
        if (this.readerClosed.get()) {
            try {
                this.reader = DirectoryReader.open(this.dir);
                this.searcher = new IndexSearcher(this.reader);
                this.readerClosed.set(false);
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void addWordsToDoc(int i, final List<VocabWord> list) {
        if (this.cache) {
            this.words.add(list);
            this.indexManager.execute(new Runnable() { // from class: org.deeplearning4j.text.invertedindex.LuceneInvertedIndex.1
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        if (LuceneInvertedIndex.this.writer == null) {
                            if (LuceneInvertedIndex.this.indexBeingCreated.get()) {
                                LuceneInvertedIndex.this.waitOnWriter();
                            } else {
                                LuceneInvertedIndex.this.createWriter();
                            }
                        }
                        Document document = new Document();
                        Iterator it = list.iterator();
                        while (it.hasNext()) {
                            document.add(new TextField("word", ((VocabWord) it.next()).getWord(), Field.Store.YES));
                        }
                        LuceneInvertedIndex.this.writer.addDocument(document, LuceneInvertedIndex.this.analyzer);
                        LuceneInvertedIndex.this.initReader();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        } else {
            Document document = new Document();
            Iterator<VocabWord> it = list.iterator();
            while (it.hasNext()) {
                document.add(new TextField("word", it.next().getWord(), Field.Store.YES));
            }
            while (this.writer == null) {
                try {
                    if (!this.indexBeingCreated.get() || this.writer == null) {
                        createWriter();
                    } else {
                        waitOnWriter();
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            this.writer.addDocument(document, this.analyzer);
            initReader();
        }
        this.totalWords.set(this.totalWords.get() + list.size());
        this.miniBatchDocs.add(list);
    }

    private void startMiniBatches() {
        this.miniBatchManager.schedule(new Runnable() { // from class: org.deeplearning4j.text.invertedindex.LuceneInvertedIndex.2
            @Override // java.lang.Runnable
            public void run() {
                ArrayList arrayList = new ArrayList();
                while (LuceneInvertedIndex.this.miniBatchGoing.get()) {
                    List<VocabWord> list = (List) LuceneInvertedIndex.this.miniBatchDocs.poll();
                    if (list == null || list.isEmpty()) {
                        try {
                            Thread.sleep(1000L);
                        } catch (InterruptedException e) {
                            Thread.currentThread().interrupt();
                        }
                    } else {
                        for (VocabWord vocabWord : list) {
                            if (LuceneInvertedIndex.this.sample <= 0.0d) {
                                arrayList.add(vocabWord);
                                if (arrayList.size() >= LuceneInvertedIndex.this.batchSize) {
                                    LuceneInvertedIndex.this.miniBatches.add(new ArrayList(arrayList));
                                    arrayList.clear();
                                }
                            } else if (((Math.sqrt(vocabWord.getWordFrequency() / (LuceneInvertedIndex.this.sample * LuceneInvertedIndex.this.numDocuments())) + 1.0d) * (LuceneInvertedIndex.this.sample * LuceneInvertedIndex.this.numDocuments())) / vocabWord.getWordFrequency() >= (LuceneInvertedIndex.this.nextRandom.get() & 65535) / 65536.0d) {
                                arrayList.add(vocabWord);
                            }
                        }
                        try {
                            Thread.sleep(1000L);
                        } catch (InterruptedException e2) {
                            Thread.currentThread().interrupt();
                        }
                    }
                }
            }
        }, 1L, TimeUnit.SECONDS);
    }

    private void ensureDirExists() throws Exception {
        if (this.dir == null) {
            this.dir = FSDirectory.open(new File(this.indexPath));
            File file = new File(this.indexPath);
            if (file.exists()) {
                return;
            }
            file.mkdir();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void createWriter() {
        try {
            ensureDirExists();
            if (this.analyzer == null) {
                this.analyzer = new StandardAnalyzer(new InputStreamReader(new ByteArrayInputStream("".getBytes())));
            }
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, this.analyzer);
            if (!this.indexBeingCreated.get() && this.writer == null) {
                this.indexBeingCreated.set(true);
                this.writer = new IndexWriter(this.dir, indexWriterConfig);
            } else if (!this.indexBeingCreated.get()) {
                this.indexBeingCreated.set(true);
                this.writer = new IndexWriter(this.dir, indexWriterConfig);
            }
        } catch (Exception e) {
            throw new IllegalStateException("Failed to created writer", e);
        } catch (LockObtainFailedException e2) {
            try {
                IndexWriter.unlock(this.dir);
                try {
                    this.writer = new IndexWriter(this.dir, new IndexWriterConfig(Version.LATEST, this.analyzer));
                } catch (IOException e3) {
                    log.warn("Failed to created writer...trying again");
                    createWriter();
                }
            } catch (IOException e4) {
                throw new RuntimeException("Unable to unlock directory " + this.indexPath);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void waitOnWriter() {
        while (this.writer == null) {
            try {
                Thread.sleep(1000L);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public void finish() {
        if (this.cache) {
            this.indexManager.execute(new Runnable() { // from class: org.deeplearning4j.text.invertedindex.LuceneInvertedIndex.3
                @Override // java.lang.Runnable
                public void run() {
                    try {
                        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, LuceneInvertedIndex.this.analyzer);
                        if (LuceneInvertedIndex.this.dir == null) {
                            LuceneInvertedIndex.this.dir = FSDirectory.open(new File(LuceneInvertedIndex.INDEX_PATH));
                        }
                        if (LuceneInvertedIndex.this.writer == null) {
                            if (LuceneInvertedIndex.this.indexBeingCreated.get()) {
                                LuceneInvertedIndex.this.waitOnWriter();
                            } else {
                                LuceneInvertedIndex.this.writer = new IndexWriter(LuceneInvertedIndex.this.dir, indexWriterConfig);
                            }
                        }
                        if (IndexWriter.isLocked(LuceneInvertedIndex.this.dir)) {
                            IndexWriter.unlock(LuceneInvertedIndex.this.dir);
                        }
                        LuceneInvertedIndex.this.writer.forceMerge(1);
                        LuceneInvertedIndex.this.writer.commit();
                        LuceneInvertedIndex.this.initReader();
                        LuceneInvertedIndex.this.numDocs = LuceneInvertedIndex.this.reader.numDocs();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            });
        } else {
            try {
                log.info("Committing index...");
                this.writer.forceMerge(1);
                this.writer.commit();
                log.info("Finished committing changes");
            } catch (IOException e) {
                e.printStackTrace();
            }
            initReader();
            this.numDocs = this.reader.numDocs();
            try {
                this.reader.close();
            } catch (IOException e2) {
                e2.printStackTrace();
            }
        }
        this.indexManager.shutdown();
        this.miniBatchManager.shutdown();
        try {
            this.indexManager.awaitTermination(1L, TimeUnit.DAYS);
            this.miniBatchGoing.set(false);
            this.miniBatchManager.awaitTermination(1L, TimeUnit.MINUTES);
        } catch (InterruptedException e3) {
            Thread.currentThread().interrupt();
        }
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public int totalWords() {
        return this.totalWords.get();
    }

    @Override // org.deeplearning4j.text.invertedindex.InvertedIndex
    public int batchSize() {
        return this.batchSize;
    }

    public void onClose(IndexReader indexReader) {
        this.readerClosed.set(true);
    }

    @Override // java.util.Iterator
    public boolean hasNext() {
        return (this.miniBatchDocs.isEmpty() && !this.miniBatchGoing.get() && this.miniBatchManager.isShutdown()) ? false : true;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // java.util.Iterator
    public List<VocabWord> next() {
        if (!this.miniBatchDocs.isEmpty()) {
            return this.miniBatchDocs.poll();
        }
        if (this.miniBatchManager.isShutdown()) {
            return null;
        }
        while (this.miniBatchDocs.isEmpty()) {
            try {
                Thread.sleep(1000L);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
            log.warn("Waiting on more data...");
            if (this.miniBatchManager.isShutdown()) {
                return this.miniBatchDocs.poll();
            }
        }
        return null;
    }
}
