package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/CSort.class */
public class CSort {
    static Logger logger = Logger.getLogger(CSort.class.getName());
    private static Pattern spacePattern = Pattern.compile(" ");
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");
    public static final int BUFFER_SIZE = 1024;
    public static final int DEFAULT_SIZE = 10000000;
    private File in;
    private File out;
    private int size;
    private int col;
    private int notificationPoint;
    public static final int DEFAULT_NOTIFICATION_POINT = 1000000;

    public CSort(String str, String str2, int i, int i2) throws IOException {
        this(new File(str), new File(str2), i, i2);
    }

    public CSort(File file, File file2, int i, int i2) throws IOException {
        this.in = file;
        this.out = file2;
        this.size = i2;
        this.col = i;
        this.notificationPoint = 1000000;
    }

    public int getNotificationPoint() {
        return this.notificationPoint;
    }

    public void setNotificationPoint(int i) {
        this.notificationPoint = i;
    }

    public void run() {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("sorting " + this.in + ParsedPageLink.START_SUFFIX_PATTERN + this.out + ", " + df.format(this.size) + ", " + this.col + " " + new Date() + ")...");
        Stack<File> stack = new Stack<>();
        try {
            LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(this.in), "UTF-8"));
            int i = 0;
            while (true) {
                Map<String, List<byte[]>> read = read(lineNumberReader, this.size, this.col);
                if (read.size() <= 0) {
                    break;
                }
                File createTempFile = File.createTempFile(this.out.getName(), Integer.toString(i), this.out.getParentFile());
                write(read, createTempFile);
                stack.push(createTempFile);
                if (stack.size() > 5) {
                    merge(stack, this.col);
                }
                i++;
            }
            merge(stack, this.col);
            File pop = stack.pop();
            logger.info("renaming " + pop + " to " + this.out + "...");
            pop.renameTo(this.out);
            lineNumberReader.close();
        } catch (IOException e) {
            logger.error(e);
        }
        logger.info("process done in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
    }

    private void merge(Stack<File> stack, int i) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("merging " + stack.size() + " files... " + new Date());
        while (stack.size() > 1) {
            File pop = stack.pop();
            File pop2 = stack.pop();
            File createTempFile = File.createTempFile("merge", "", pop.getParentFile());
            new Merge(pop, pop2, createTempFile, i);
            logger.debug("deleting " + pop + "...");
            pop.delete();
            logger.debug("deleting " + pop2 + "...");
            pop2.delete();
            logger.debug("pushing " + createTempFile + "...");
            stack.push(createTempFile);
        }
        logger.info("merge done in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
    }

    private Map<String, List<byte[]>> read(LineNumberReader lineNumberReader, int i, int i2) throws IOException {
        String readLine;
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("sorting " + df.format(i) + " lines starting from " + lineNumberReader.getLineNumber() + "... " + new Date());
        TreeMap treeMap = new TreeMap();
        int i3 = 0;
        int i4 = 0;
        while (i3 < i && (readLine = lineNumberReader.readLine()) != null) {
            String[] split = tabPattern.split(readLine);
            if (split.length > i2) {
                List list = (List) treeMap.get(split[i2]);
                if (list == null) {
                    list = new ArrayList();
                    treeMap.put(split[i2], list);
                }
                list.add(zip(readLine));
            } else {
                i4++;
            }
            i3++;
            if (i3 % this.notificationPoint == 0) {
                System.out.print(".");
            }
        }
        System.out.print("\n");
        logger.info("sorted " + df.format(i3) + " lines in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
        logger.info("found " + df.format(treeMap.size()) + " unique keys");
        if (i4 > 0) {
            logger.warn(df.format(i4) + " lines where the number of \\t is lower than " + i2);
        }
        return treeMap;
    }

    private void write(Map<String, List<byte[]>> map, File file) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("writing " + df.format(map.size()) + " unique keys in " + file + "...");
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")));
        Iterator<String> it = map.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            List<byte[]> list = map.get(it.next());
            for (int i2 = 0; i2 < list.size(); i2++) {
                printWriter.println(new String(unzip(list.get(i2))));
                i++;
                if (i % this.notificationPoint == 0) {
                    System.out.print(".");
                }
            }
        }
        printWriter.flush();
        printWriter.close();
        logger.info("wrote " + df.format(i) + " lines in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms");
    }

    public static byte[] zip(String str) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        GZIPOutputStream gZIPOutputStream = new GZIPOutputStream(byteArrayOutputStream);
        byte[] bytes = str.getBytes("UTF-8");
        gZIPOutputStream.write(bytes, 0, bytes.length);
        gZIPOutputStream.close();
        return byteArrayOutputStream.toByteArray();
    }

    public static String unzip(byte[] bArr) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        GZIPInputStream gZIPInputStream = new GZIPInputStream(new ByteArrayInputStream(bArr));
        byte[] bArr2 = new byte[1024];
        while (true) {
            int read = gZIPInputStream.read(bArr2, 0, 1024);
            if (read == -1) {
                gZIPInputStream.close();
                byteArrayOutputStream.close();
                return byteArrayOutputStream.toString("UTF-8");
            }
            byteArrayOutputStream.write(bArr2, 0, read);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 4) {
            logger.info("java -mx1G org.fbk.cit.hlt.thewikimachine.csv.CSort in-file out-file col size");
            System.exit(1);
        }
        new CSort(new File(strArr[0]), new File(strArr[1]), Integer.parseInt(strArr[2]), Integer.parseInt(strArr[3])).run();
    }
}
