package org.fbk.cit.hlt.thewikimachine.index.util;

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.xerial.snappy.SnappyInputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/index/util/IntSetIndexer.class */
public abstract class IntSetIndexer extends AbstractIndexer {
    public static final String DEFAULT_KEY_FIELD_NAME = "KEY";
    public static final String DEFAULT_VALUE_FIELD_NAME = "VALUE";
    protected String keyFieldName;
    protected String valueFieldName;
    static Logger logger = Logger.getLogger(IntSetIndexer.class.getName());
    protected static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    protected static DecimalFormat df = new DecimalFormat("###,###,###,###");

    protected IntSetIndexer(String str) throws IOException {
        this(str, "KEY", "VALUE");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public IntSetIndexer(String str, String str2, String str3) throws IOException {
        super(str);
        this.keyFieldName = str2;
        this.valueFieldName = str3;
    }

    public String getKeyFieldName() {
        return this.keyFieldName;
    }

    public void setKeyFieldName(String str) {
        this.keyFieldName = str;
    }

    public String getValueFieldName() {
        return this.valueFieldName;
    }

    public void setValueFieldName(String str) {
        this.valueFieldName = str;
    }

    public abstract void index(String str, boolean z) throws IOException;

    public abstract void index(File file, boolean z) throws IOException;

    /* JADX INFO: Access modifiers changed from: protected */
    public void index(String str, int i, int i2, boolean z) throws IOException {
        index(new File(str), i, i2, z);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void index(File file, int i, int i2, boolean z) throws IOException {
        logger.info("indexing " + file + "...");
        int max = Math.max(i, i2);
        long currentTimeMillis = System.currentTimeMillis();
        LineNumberReader lineNumberReader = z ? new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file)), "UTF-8")) : new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        String str = "";
        TreeSet treeSet = new TreeSet();
        logger.info("tot\tcount\ttime\tdate");
        String readLine = lineNumberReader.readLine();
        if (readLine != null) {
            String[] split = tabPattern.split(readLine);
            if (split.length > max && split[i] != null && split[i2] != null) {
                treeSet.add(new Integer(split[i2]));
                str = split[i];
                i4 = 0 + 1;
            }
            i5 = 0 + 1;
        }
        while (true) {
            String readLine2 = lineNumberReader.readLine();
            if (readLine2 == null) {
                logger.info(df.format(i5) + " lines read, key indexed: " + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
                add(str, treeSet);
                lineNumberReader.close();
                return;
            }
            String[] split2 = tabPattern.split(readLine2);
            if (split2.length > max && split2[i] != null && split2[i2] != null) {
                if (!split2[i].equals(str)) {
                    add(str, treeSet);
                    i3++;
                    treeSet = new TreeSet();
                    i4 = 0;
                }
                treeSet.add(new Integer(split2[i2]));
                str = split2[i];
                i4++;
            }
            i5++;
            if (i5 % this.notificationPoint == 0) {
                logger.info(df.format(i5) + StringTable.HORIZONTAL_TABULATION + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
    }

    public void add(String str, Set<Integer> set) {
        Document document = new Document();
        try {
            document.add(new Field(this.keyFieldName, str, Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.add(new Field(this.valueFieldName, toByte(set), Field.Store.YES));
            this.indexWriter.addDocument(document);
        } catch (IOException e) {
            logger.error(e);
        }
    }

    protected byte[] toByte(Set<Integer> set) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(1024);
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
        dataOutputStream.writeInt(set.size());
        Iterator<Integer> it = set.iterator();
        int i = 0;
        while (it.hasNext()) {
            dataOutputStream.writeInt(it.next().intValue());
            i++;
        }
        return byteArrayOutputStream.toByteArray();
    }
}
