package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.xerial.snappy.SnappyInputStream;
import org.xerial.snappy.SnappyOutputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/FastMerge.class */
public class FastMerge {
    private Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    private int notificationPoint;
    public static final int DEFAULT_NOTIFICATION_POINT = 1000000;
    private char separatorChar;
    private int col;
    private int size;
    public static final int DEFAULT_SEPARATOR_CHAR = 9;
    static Logger logger = Logger.getLogger(FastMerge.class.getName());
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/FastMerge$Line.class */
    public class Line implements Comparable<Line> {
        String s;
        String key;

        Line(String str) {
            this.s = str;
            int findStart = findStart(str);
            this.key = str.substring(findStart, findEnd(str, findStart + 1));
        }

        int findStart(String str) {
            if (FastMerge.this.col == 0) {
                return 0;
            }
            int i = 1;
            for (int i2 = 0; i2 < str.length(); i2++) {
                if (str.charAt(i2) == FastMerge.this.separatorChar) {
                    if (i == FastMerge.this.col) {
                        return i2 + 1;
                    }
                    i++;
                }
            }
            return 0;
        }

        int findEnd(String str, int i) {
            for (int i2 = i; i2 < str.length(); i2++) {
                if (str.charAt(i2) == FastMerge.this.separatorChar) {
                    return i2;
                }
            }
            return str.length();
        }

        public String getKey() {
            return this.key;
        }

        public String toString() {
            return this.s;
        }

        @Override // java.lang.Comparable
        public int compareTo(Line line) {
            return this.key.compareTo(line.getKey());
        }
    }

    public FastMerge(File file, File file2, File file3, int i, int i2, boolean z) throws IOException {
        PrintWriter printWriter;
        LineNumberReader lineNumberReader;
        LineNumberReader lineNumberReader2;
        this.col = i;
        this.size = i2;
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        long length = file.length();
        long length2 = file2.length();
        logger.info("merging 2 " + (z ? "compressed" : "uncompressed") + " files...");
        logger.info("file1:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(length) + ")");
        logger.info("file2:\t" + file2 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(length2) + ")");
        logger.info("out:\t" + file3);
        this.notificationPoint = 1000000;
        this.separatorChar = '\t';
        if (z) {
            printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new SnappyOutputStream(new FileOutputStream(file3)), "UTF-8")));
            lineNumberReader = new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file)), "UTF-8"));
            lineNumberReader2 = new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file2)), "UTF-8"));
        } else {
            printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file3), "UTF-8")));
            lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            lineNumberReader2 = new LineNumberReader(new InputStreamReader(new FileInputStream(file2), "UTF-8"));
        }
        int i3 = 0;
        List<Line> read = read(lineNumberReader);
        List<Line> read2 = read(lineNumberReader2);
        ArrayList arrayList = new ArrayList();
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        while (read.size() > 0 && read2.size() > 0) {
            Line line = read.get(i4);
            Line line2 = read2.get(i5);
            int compareTo = line.compareTo(line2);
            if (compareTo == 0) {
                arrayList.add(line);
                arrayList.add(line2);
                i4++;
                i5++;
                i6 += 2;
            } else if (compareTo > 0) {
                arrayList.add(line2);
                i5++;
                i6++;
            } else {
                arrayList.add(line);
                i4++;
                i6++;
            }
            if (i4 >= read.size()) {
                read = read(lineNumberReader);
                i4 = 0;
            }
            if (i5 >= read2.size()) {
                read2 = read(lineNumberReader2);
                i5 = 0;
            }
            if (arrayList.size() > i2) {
                write(arrayList, printWriter);
                arrayList.clear();
                i6 = 0;
            }
            i3++;
            if (i3 % this.notificationPoint == 0) {
                logger.info(df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(i5) + StringTable.HORIZONTAL_TABULATION + df.format(i6) + StringTable.HORIZONTAL_TABULATION + df.format(i4 + i5) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
        if (arrayList.size() > 0) {
            write(arrayList, printWriter);
        }
        while (true) {
            List<Line> read3 = read(lineNumberReader);
            if (read3.size() <= 0) {
                break;
            } else {
                write(read3, printWriter);
            }
        }
        while (true) {
            List<Line> read4 = read(lineNumberReader2);
            if (read4.size() <= 0) {
                printWriter.close();
                lineNumberReader.close();
                lineNumberReader2.close();
                logger.info("merge done in " + df.format(System.currentTimeMillis() - currentTimeMillis2) + " ms");
                logger.info("file1:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file.length()) + ")");
                logger.info("file2:\t" + file2 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file2.length()) + ")");
                logger.info("out:\t" + file3 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file3.length()) + ", " + df.format(length + length2) + ")");
                return;
            }
            write(read4, printWriter);
        }
    }

    private void write(List<Line> list, PrintWriter printWriter) throws IOException {
        logger.info("writing " + list.size() + " lines...");
        for (int i = 0; i < list.size(); i++) {
            printWriter.println(list.get(i));
        }
    }

    private List<Line> read(LineNumberReader lineNumberReader) throws IOException {
        String readLine;
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        logger.info("reading " + df.format(this.size) + " lines starting from " + df.format(lineNumberReader.getLineNumber()) + "... " + new Date());
        ArrayList arrayList = new ArrayList();
        int i = 0;
        logger.info("lines\tsize\ttime\tdate");
        while (i < this.size && (readLine = lineNumberReader.readLine()) != null) {
            arrayList.add(new Line(readLine));
            i++;
            if (i % this.notificationPoint == 0) {
                logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(arrayList.size()) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
        logger.info(df.format(arrayList.size()) + " lines read in " + (System.currentTimeMillis() - currentTimeMillis2) + " ms");
        return arrayList;
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 6) {
            System.err.println("Wrong number of parameters " + strArr.length);
            System.err.println("Usage: java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.csv.FastMerge file1 file2 out col size compress");
            System.exit(-1);
        }
        new FastMerge(new File(strArr[0]), new File(strArr[1]), new File(strArr[2]), Integer.parseInt(strArr[3]), Integer.parseInt(strArr[4]), Boolean.parseBoolean(strArr[5]));
    }
}
