package org.deeplearning4j.bagofwords.vectorizer;

import akka.actor.ActorRef;
import akka.actor.ActorSystem;
import akka.actor.Props;
import akka.routing.RoundRobinPool;
import java.io.File;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.deeplearning4j.models.word2vec.StreamWork;
import org.deeplearning4j.models.word2vec.VocabWork;
import org.deeplearning4j.models.word2vec.actor.VocabActor;
import org.deeplearning4j.models.word2vec.wordstore.VocabCache;
import org.deeplearning4j.text.documentiterator.DocumentIterator;
import org.deeplearning4j.text.invertedindex.InvertedIndex;
import org.deeplearning4j.text.invertedindex.LuceneInvertedIndex;
import org.deeplearning4j.text.sentenceiterator.SentenceIterator;
import org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/deeplearning4j/bagofwords/vectorizer/BaseTextVectorizer.class */
public abstract class BaseTextVectorizer implements TextVectorizer {
    protected transient VocabCache cache;
    protected static ActorSystem trainingSystem;
    protected transient TokenizerFactory tokenizerFactory;
    protected List<String> stopWords;
    private int layerSize;
    protected int minWordFrequency;
    protected transient DocumentIterator docIter;
    protected List<String> labels;
    protected transient SentenceIterator sentenceIterator;
    protected AtomicLong numWordsEncountered;
    private static Logger log = LoggerFactory.getLogger(BaseTextVectorizer.class);
    protected InvertedIndex index;
    protected int batchSize;
    protected double sample;
    protected boolean stem;

    public BaseTextVectorizer() {
        this.layerSize = 0;
        this.minWordFrequency = 5;
        this.numWordsEncountered = new AtomicLong(0L);
        this.batchSize = 1000;
        this.sample = 0.0d;
        this.stem = false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public BaseTextVectorizer(VocabCache vocabCache, TokenizerFactory tokenizerFactory, List<String> list, int i, int i2, DocumentIterator documentIterator, SentenceIterator sentenceIterator, List<String> list2, InvertedIndex invertedIndex, int i3, double d, boolean z) {
        this.layerSize = 0;
        this.minWordFrequency = 5;
        this.numWordsEncountered = new AtomicLong(0L);
        this.batchSize = 1000;
        this.sample = 0.0d;
        this.stem = false;
        this.cache = vocabCache;
        this.tokenizerFactory = tokenizerFactory;
        this.stopWords = list;
        this.layerSize = i;
        this.minWordFrequency = i2;
        this.docIter = documentIterator;
        this.sentenceIterator = sentenceIterator;
        this.labels = list2;
        this.index = invertedIndex;
        this.batchSize = i3;
        this.sample = d;
        this.stem = z;
        if (invertedIndex == null) {
            this.index = new LuceneInvertedIndex.Builder().batchSize(i3).indexDir(new File("word2vec-index")).sample(d).cache(vocabCache).build();
        }
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public int batchSize() {
        return this.batchSize;
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public double sample() {
        return this.sample;
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public void fit() {
        String nextSentence;
        if (trainingSystem == null) {
            trainingSystem = ActorSystem.create();
        }
        AtomicLong atomicLong = new AtomicLong(System.currentTimeMillis());
        AtomicInteger atomicInteger = new AtomicInteger(0);
        ActorRef actorOf = trainingSystem.actorOf(new RoundRobinPool(Runtime.getRuntime().availableProcessors()).props(Props.create(VocabActor.class, new Object[]{this.tokenizerFactory, this.cache, this.stopWords, atomicLong, Integer.valueOf(this.minWordFrequency), this.numWordsEncountered, this.index})));
        AtomicInteger atomicInteger2 = new AtomicInteger(0);
        while (this.docIter != null && this.docIter.hasNext()) {
            actorOf.tell(new StreamWork(new DefaultInputStreamCreator(this.docIter), atomicInteger2), actorOf);
            atomicInteger.incrementAndGet();
            if (atomicInteger.get() % 10000 == 0) {
                log.info("Sent " + atomicInteger);
                try {
                    Thread.sleep(1L);
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                }
            }
        }
        while (getSentenceIterator() != null && getSentenceIterator().hasNext() && (nextSentence = getSentenceIterator().nextSentence()) != null) {
            actorOf.tell(new VocabWork(atomicInteger2, nextSentence, this.stem), actorOf);
            atomicInteger.incrementAndGet();
            if (atomicInteger.get() % 10000 == 0) {
                log.info("Sent " + atomicInteger);
                try {
                    Thread.sleep(1L);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                }
            }
        }
        while (atomicInteger2.get() < atomicInteger.get()) {
            try {
                Thread.sleep(10000L);
                log.info("latch count " + atomicInteger2.get() + " with queued " + atomicInteger.get());
            } catch (InterruptedException e3) {
                Thread.currentThread().interrupt();
            }
        }
        log.info("Invoking finish on index");
        this.index.finish();
        trainingSystem.shutdown();
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public VocabCache vocab() {
        return this.cache;
    }

    public SentenceIterator getSentenceIterator() {
        return this.sentenceIterator;
    }

    public void setSentenceIterator(SentenceIterator sentenceIterator) {
        this.sentenceIterator = sentenceIterator;
    }

    public DocumentIterator getDocIter() {
        return this.docIter;
    }

    public void setDocIter(DocumentIterator documentIterator) {
        this.docIter = documentIterator;
    }

    public int getMinWordFrequency() {
        return this.minWordFrequency;
    }

    public void setMinWordFrequency(int i) {
        this.minWordFrequency = i;
    }

    public int getLayerSize() {
        return this.layerSize;
    }

    public void setLayerSize(int i) {
        this.layerSize = i;
    }

    public List<String> getStopWords() {
        return this.stopWords;
    }

    public void setStopWords(List<String> list) {
        this.stopWords = list;
    }

    public TokenizerFactory getTokenizerFactory() {
        return this.tokenizerFactory;
    }

    public void setTokenizerFactory(TokenizerFactory tokenizerFactory) {
        this.tokenizerFactory = tokenizerFactory;
    }

    public VocabCache getCache() {
        return this.cache;
    }

    public void setCache(VocabCache vocabCache) {
        this.cache = vocabCache;
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public long numWordsEncountered() {
        return this.numWordsEncountered.get();
    }

    @Override // org.deeplearning4j.bagofwords.vectorizer.TextVectorizer
    public InvertedIndex index() {
        return this.index;
    }
}
