package cc.twittertools.index;

import cc.twittertools.corpus.data.JsonStatusCorpusReader;
import cc.twittertools.corpus.data.Status;
import java.io.File;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/* loaded from: input_file:cc/twittertools/index/IndexStatuses.class */
public class IndexStatuses {
    private static final Logger LOG = Logger.getLogger(IndexStatuses.class);
    public static final Analyzer ANALYZER = new TweetAnalyzer(Version.LUCENE_43);
    public static String corpusFormat = null;
    private static final String HELP_OPTION = "h";
    private static final String COLLECTION_OPTION = "collection";
    private static final String INDEX_OPTION = "index";
    private static final String OPTIMIZE_OPTION = "optimize";
    private static final String STORE_TERM_VECTORS_OPTION = "store";

    /* loaded from: input_file:cc/twittertools/index/IndexStatuses$StatusField.class */
    public enum StatusField {
        ID("id"),
        SCREEN_NAME("screen_name"),
        EPOCH("epoch"),
        TEXT("text"),
        LANG("lang"),
        IN_REPLY_TO_STATUS_ID("in_reply_to_status_id"),
        IN_REPLY_TO_USER_ID("in_reply_to_user_id"),
        FOLLOWERS_COUNT("followers_count"),
        FRIENDS_COUNT("friends_count"),
        STATUSES_COUNT("statuses_count"),
        RETWEETED_STATUS_ID("retweeted_status_id"),
        RETWEETED_USER_ID("retweeted_user_id"),
        RETWEET_COUNT("retweet_count");

        public final String name;

        StatusField(String str) {
            this.name = str;
        }
    }

    private IndexStatuses() {
    }

    public static void main(String[] strArr) throws Exception {
        Options options = new Options();
        options.addOption(new Option(HELP_OPTION, "show help"));
        options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
        options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));
        OptionBuilder.withArgName("dir");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("source collection directory");
        options.addOption(OptionBuilder.create(COLLECTION_OPTION));
        OptionBuilder.withArgName("dir");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("index location");
        options.addOption(OptionBuilder.create(INDEX_OPTION));
        CommandLine commandLine = null;
        try {
            commandLine = new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error parsing command line: " + e.getMessage());
            System.exit(-1);
        }
        if (commandLine.hasOption(HELP_OPTION) || !commandLine.hasOption(COLLECTION_OPTION) || !commandLine.hasOption(INDEX_OPTION)) {
            new HelpFormatter().printHelp(IndexStatuses.class.getName(), options);
            System.exit(-1);
        }
        String optionValue = commandLine.getOptionValue(COLLECTION_OPTION);
        String optionValue2 = commandLine.getOptionValue(INDEX_OPTION);
        FieldType fieldType = new FieldType();
        fieldType.setIndexed(true);
        fieldType.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        fieldType.setStored(true);
        fieldType.setTokenized(true);
        if (commandLine.hasOption(STORE_TERM_VECTORS_OPTION)) {
            fieldType.setStoreTermVectors(true);
        }
        long currentTimeMillis = System.currentTimeMillis();
        File file = new File(optionValue);
        if (!file.exists()) {
            System.err.println("Error: " + file + " does not exist!");
            System.exit(-1);
        }
        JsonStatusCorpusReader jsonStatusCorpusReader = new JsonStatusCorpusReader(file);
        FSDirectory open = FSDirectory.open(new File(optionValue2));
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, ANALYZER);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        LOG.info("collection: " + optionValue);
        LOG.info("index: " + optionValue2);
        IndexWriter indexWriter = new IndexWriter(open, indexWriterConfig);
        int i = 0;
        while (true) {
            try {
                try {
                    Status next = jsonStatusCorpusReader.next();
                    if (next == null) {
                        break;
                    }
                    if (next.getText() != null) {
                        i++;
                        Document document = new Document();
                        document.add(new LongField(StatusField.ID.name, next.getId(), Field.Store.YES));
                        document.add(new LongField(StatusField.EPOCH.name, next.getEpoch(), Field.Store.YES));
                        document.add(new TextField(StatusField.SCREEN_NAME.name, next.getScreenname(), Field.Store.YES));
                        document.add(new Field(StatusField.TEXT.name, next.getText(), fieldType));
                        document.add(new IntField(StatusField.FRIENDS_COUNT.name, next.getFollowersCount(), Field.Store.YES));
                        document.add(new IntField(StatusField.FOLLOWERS_COUNT.name, next.getFriendsCount(), Field.Store.YES));
                        document.add(new IntField(StatusField.STATUSES_COUNT.name, next.getStatusesCount(), Field.Store.YES));
                        long inReplyToStatusId = next.getInReplyToStatusId();
                        if (inReplyToStatusId > 0) {
                            document.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES));
                            document.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, next.getInReplyToUserId(), Field.Store.YES));
                        }
                        if (!next.getLang().equals("unknown")) {
                            document.add(new TextField(StatusField.LANG.name, next.getLang(), Field.Store.YES));
                        }
                        long retweetedStatusId = next.getRetweetedStatusId();
                        if (retweetedStatusId > 0) {
                            document.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetedStatusId, Field.Store.YES));
                            document.add(new LongField(StatusField.RETWEETED_USER_ID.name, next.getRetweetedUserId(), Field.Store.YES));
                            document.add(new IntField(StatusField.RETWEET_COUNT.name, next.getRetweetCount(), Field.Store.YES));
                            if (next.getRetweetCount() < 0 || next.getRetweetedStatusId() < 0) {
                                LOG.warn("Error parsing retweet fields of " + next.getId());
                            }
                        }
                        indexWriter.addDocument(document);
                        if (i % 100000 == 0) {
                            LOG.info(i + " statuses indexed");
                        }
                    }
                } catch (Exception e2) {
                    e2.printStackTrace();
                    indexWriter.close();
                    open.close();
                    jsonStatusCorpusReader.close();
                    return;
                }
            } catch (Throwable th) {
                indexWriter.close();
                open.close();
                jsonStatusCorpusReader.close();
                throw th;
            }
        }
        LOG.info(String.format("Total of %s statuses added", Integer.valueOf(i)));
        if (commandLine.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            indexWriter.forceMerge(1);
            LOG.info("Done!");
        }
        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
        indexWriter.close();
        open.close();
        jsonStatusCorpusReader.close();
    }
}
