package org.fbk.cit.hlt.thewikimachine.index.util;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/index/util/UniqueIDValueIndexer.class */
public abstract class UniqueIDValueIndexer extends AbstractIndexer {
    public static final String DEFAULT_KEY_FIELD_NAME = "KEY";
    public static final String DEFAULT_VALUE_FIELD_NAME = "VALUE";
    protected String keyFieldName;
    protected String valueFieldName;
    static Logger logger = Logger.getLogger(UniqueIDValueIndexer.class.getName());
    protected static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    protected static DecimalFormat df = new DecimalFormat("###,###,###,###");

    /* JADX INFO: Access modifiers changed from: protected */
    public UniqueIDValueIndexer(String str, String str2, String str3) throws IOException {
        super(str);
        this.keyFieldName = str2;
        this.valueFieldName = str3;
    }

    public void index(String str, int i) throws IOException {
        index(new File(str), i);
    }

    public void index(File file, int i) throws IOException {
        logger.info("indexing " + file + "...");
        long currentTimeMillis = System.currentTimeMillis();
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i2 = 0;
        int i3 = 0;
        logger.info("tot\tcount\ttime\tdate");
        String str = "";
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                logger.info(df.format(i2) + " lines read: " + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
                lineNumberReader.close();
                return;
            }
            String[] split = tabPattern.split(readLine);
            if (split.length > i && !split[i].equals(str)) {
                int i4 = i3;
                i3++;
                add(i4, split[i]);
                str = split[i];
            }
            i2++;
            if (i2 % this.notificationPoint == 0) {
                logger.info(df.format(i2) + StringTable.HORIZONTAL_TABULATION + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
    }

    protected void add(int i, String str) throws IOException {
        Document document = new Document();
        try {
            document.add(new Field(this.keyFieldName, SerialUtils.toByteArray(i), Field.Store.YES));
            document.add(new Field(this.valueFieldName, str, Field.Store.YES, Field.Index.NOT_ANALYZED));
            this.indexWriter.addDocument(document);
        } catch (IOException e) {
            logger.error(e);
        }
    }

    public void index(String str) throws IOException {
        index(new File(str));
    }

    public abstract void index(File file) throws IOException;
}
