package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.xerial.snappy.SnappyInputStream;
import org.xerial.snappy.SnappyOutputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/Merge.class */
public class Merge {
    private Pattern tabPattern;
    private int notificationPoint;
    public static final int DEFAULT_NOTIFICATION_POINT = 1000000;
    static Logger logger = Logger.getLogger(Merge.class.getName());
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");

    public Merge(File file, File file2, File file3, int i) throws IOException {
        this(file, file2, file3, i, false);
    }

    public Merge(File file, File file2, File file3, int i, boolean z) throws IOException {
        PrintWriter printWriter;
        LineNumberReader lineNumberReader;
        LineNumberReader lineNumberReader2;
        this.tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        long length = file.length();
        long length2 = file2.length();
        logger.info("merging 2 " + (z ? "compressed" : "uncompressed") + " files...");
        logger.info("file1:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(length) + ")");
        logger.info("file2:\t" + file2 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(length2) + ")");
        logger.info("out:\t" + file3);
        this.notificationPoint = 1000000;
        if (z) {
            printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new SnappyOutputStream(new FileOutputStream(file3)), "UTF-8")));
            lineNumberReader = new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file)), "UTF-8"));
            lineNumberReader2 = new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file2)), "UTF-8"));
        } else {
            printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file3), "UTF-8")));
            lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
            lineNumberReader2 = new LineNumberReader(new InputStreamReader(new FileInputStream(file2), "UTF-8"));
        }
        int i2 = 1;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        String readLine = lineNumberReader.readLine();
        String readLine2 = lineNumberReader2.readLine();
        logger.info("step\tl1\tl2\tl3\tsum\ttime\tdate");
        while (true) {
            if (readLine != null && readLine2 != null) {
                String[] split = this.tabPattern.split(readLine);
                String[] split2 = this.tabPattern.split(readLine2);
                if (Math.min(split.length, split2.length) <= i) {
                    logger.error(split.length + " or " + split2.length + " > " + i);
                    System.exit(0);
                    i5++;
                } else if (split[i].compareTo(split2[i]) == 0) {
                    printWriter.println(readLine);
                    printWriter.println(readLine2);
                    readLine = lineNumberReader.readLine();
                    readLine2 = lineNumberReader2.readLine();
                    i3++;
                    i4++;
                    i6 += 2;
                } else if (split[i].compareTo(split2[i]) > 0) {
                    printWriter.println(readLine2);
                    readLine2 = lineNumberReader2.readLine();
                    i4++;
                    i6++;
                } else {
                    printWriter.println(readLine);
                    readLine = lineNumberReader.readLine();
                    i3++;
                    i6++;
                }
            } else if (readLine != null && readLine2 == null) {
                printWriter.println(readLine);
                readLine = lineNumberReader.readLine();
                i3++;
                i6++;
            } else {
                if (readLine != null || readLine2 == null) {
                    break;
                }
                printWriter.println(readLine2);
                readLine2 = lineNumberReader2.readLine();
                i4++;
                i6++;
            }
            i2++;
            if (i2 % this.notificationPoint == 0) {
                logger.info(df.format(i2) + StringTable.HORIZONTAL_TABULATION + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(i6) + StringTable.HORIZONTAL_TABULATION + df.format(i3 + i4) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
        logger.info(df.format(i2) + StringTable.HORIZONTAL_TABULATION + df.format(i3) + StringTable.HORIZONTAL_TABULATION + df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(i6) + StringTable.HORIZONTAL_TABULATION + df.format(i3 + i4) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
        if (i5 > 0) {
            logger.warn(i5 + " lines where the number of tabs < " + i);
        }
        lineNumberReader.close();
        lineNumberReader2.close();
        printWriter.close();
        logger.info("merge done in " + df.format(System.currentTimeMillis() - currentTimeMillis2) + " ms");
        logger.info("file1:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file.length()) + ")");
        logger.info("file2:\t" + file2 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file2.length()) + ")");
        logger.info("out:\t" + file3 + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file3.length()) + ", " + (length + length2) + ")");
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 5) {
            System.err.println("Wrong number of parameters " + strArr.length);
            System.err.println("Usage: java -mx512M org.fbk.cit.hlt.thewikimachine.csv.Merge file1 file2 out col compress");
            System.exit(-1);
        }
        new Merge(new File(strArr[0]), new File(strArr[1]), new File(strArr[2]), Integer.parseInt(strArr[3]), Boolean.parseBoolean(strArr[4]));
    }
}
