package org.languagetool.dev.dumpcheck;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.dev.index.Indexer;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/languagetool/dev/dumpcheck/SentenceSourceIndexer.class */
public class SentenceSourceIndexer extends DefaultHandler implements AutoCloseable {
    public static final String MAX_DOC_COUNT_VALUE = "maxDocCountValue";
    public static final String MAX_DOC_COUNT_FIELD = "maxDocCount";
    public static final String MAX_DOC_COUNT_FIELD_VAL = "1";
    private final Indexer indexer;
    private final int maxSentences;
    private int sentenceCount = 0;

    SentenceSourceIndexer(Directory directory, Language language, int i) {
        this.indexer = new Indexer(directory, language);
        this.maxSentences = i;
    }

    @Override // java.lang.AutoCloseable
    public void close() throws Exception {
        this.indexer.close();
    }

    private void run(List<String> list, Language language) throws IOException {
        MixingSentenceSource create = MixingSentenceSource.create(list, language);
        while (create.hasNext()) {
            Sentence next = create.next();
            if (this.sentenceCount % 1000 == 0) {
                System.out.println("Indexing sentence #" + this.sentenceCount + " (" + create.getSourceDistribution() + "):");
                System.out.println("  [" + next.getSource() + "] " + next);
            }
            this.indexer.indexSentence(next, this.sentenceCount);
            this.sentenceCount++;
            if (this.maxSentences > 0 && this.sentenceCount >= this.maxSentences) {
                throw new DocumentLimitReachedException(this.maxSentences);
            }
        }
    }

    private void writeMetaDocuments() throws IOException {
        Document document = new Document();
        document.add(new StringField(MAX_DOC_COUNT_FIELD, MAX_DOC_COUNT_FIELD_VAL, Field.Store.YES));
        document.add(new StringField(MAX_DOC_COUNT_VALUE, this.sentenceCount + "", Field.Store.YES));
        this.indexer.add(document);
    }

    public static void main(String... strArr) throws Exception {
        if (strArr.length != 4) {
            System.out.println("Usage: " + SentenceSourceIndexer.class.getSimpleName() + " <dataFile...> <indexDir> <languageCode> <maxSentences>");
            System.out.println("\t<dataFiles> comma-separated list of a Wikipedia XML dump (*.xml) and/or Tatoeba files (tatoeba-*)");
            System.out.println("\t<indexDir> directory where Lucene index will be written to, existing index content will be removed");
            System.out.println("\t<languageCode> short code like en for English, de for German etc");
            System.out.println("\t<maxSentences> maximum number of sentences to be indexed, use 0 for no limit");
            System.exit(1);
        }
        List<String> asList = Arrays.asList(strArr[0].split(","));
        File file = new File(strArr[1]);
        String str = strArr[2];
        int parseInt = Integer.parseInt(strArr[3]);
        Language languageForShortName = Languages.getLanguageForShortName(str);
        if (parseInt == 0) {
            System.out.println("Going to index contents from " + asList);
        } else {
            System.out.println("Going to index up to " + parseInt + " sentences from " + asList);
        }
        System.out.println("Output index dir: " + file);
        long currentTimeMillis = System.currentTimeMillis();
        FSDirectory open = FSDirectory.open(file);
        Throwable th = null;
        try {
            SentenceSourceIndexer sentenceSourceIndexer = new SentenceSourceIndexer(open, languageForShortName, parseInt);
            try {
                try {
                    sentenceSourceIndexer.run(asList, languageForShortName);
                    sentenceSourceIndexer.writeMetaDocuments();
                    sentenceSourceIndexer.close();
                } catch (DocumentLimitReachedException e) {
                    System.out.println("Sentence limit (" + e.getLimit() + ") reached, stopping indexing");
                    sentenceSourceIndexer.writeMetaDocuments();
                    sentenceSourceIndexer.close();
                }
                System.out.printf("Indexing took %.2f minutes\n", Float.valueOf(((float) (System.currentTimeMillis() - currentTimeMillis)) / 60000.0f));
            } catch (Throwable th2) {
                sentenceSourceIndexer.writeMetaDocuments();
                sentenceSourceIndexer.close();
                throw th2;
            }
        } finally {
            if (open != null) {
                if (0 != 0) {
                    try {
                        open.close();
                    } catch (Throwable th3) {
                        th.addSuppressed(th3);
                    }
                } else {
                    open.close();
                }
            }
        }
    }
}
