package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.xerial.snappy.SnappyInputStream;
import org.xerial.snappy.SnappyOutputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/Sort.class */
public class Sort {
    static Logger logger = Logger.getLogger(Sort.class.getName());
    private static Pattern spacePattern = Pattern.compile(" ");
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");
    private File in;
    private File out;
    private int size;
    public static final int DEFAULT_SIZE = 20000000;
    private int col;
    private int notificationPoint;
    public static final int DEFAULT_NOTIFICATION_POINT = 1000000;
    private boolean compress;

    public Sort(String str, String str2, int i, int i2) throws IOException {
        this(new File(str), new File(str2), i, i2, false);
    }

    public Sort(String str, String str2, int i, int i2, boolean z) throws IOException {
        this(new File(str), new File(str2), i, i2, z);
    }

    public Sort(File file, File file2, int i, int i2) throws IOException {
        this(file, file2, i, i2, false);
    }

    public Sort(File file, File file2, int i, int i2, boolean z) throws IOException {
        this.in = file;
        this.out = file2;
        this.size = i2;
        this.col = i;
        this.compress = z;
        this.notificationPoint = 1000000;
    }

    public int getNotificationPoint() {
        return this.notificationPoint;
    }

    public void setNotificationPoint(int i) {
        this.notificationPoint = i;
    }

    public void run() {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("sorting a " + (this.compress ? "compressed" : "uncompressed") + " file (" + new Date() + ")...");
        logger.info("size:\t" + df.format(this.size));
        logger.info("in:\t" + this.in + ParsedPageLink.START_SUFFIX_PATTERN + df.format(this.in.length()) + ")");
        logger.info("out:\t" + this.out);
        Stack<File> stack = new Stack<>();
        try {
            LineNumberReader lineNumberReader = this.compress ? new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(this.in)), "UTF-8")) : new LineNumberReader(new InputStreamReader(new FileInputStream(this.in), "UTF-8"));
            int i = 0;
            while (true) {
                Map<String, List<String>> read = read(lineNumberReader, this.size, this.col, i);
                if (read.size() <= 0) {
                    break;
                }
                String absolutePath = this.out.getParentFile().getAbsolutePath();
                if (!absolutePath.endsWith(File.separator)) {
                    absolutePath = absolutePath + File.separator;
                }
                File file = new File(absolutePath + "sort-" + i);
                write(read, file);
                System.gc();
                stack.push(file);
                if (stack.size() >= 5) {
                    merge(stack, this.col, i);
                }
                i++;
            }
            System.gc();
            merge(stack, this.col, i);
            File pop = stack.pop();
            logger.info("renaming " + pop + " to " + this.out + "...");
            pop.renameTo(this.out);
            lineNumberReader.close();
        } catch (IOException e) {
            logger.error(e);
        }
        logger.info("sorting done in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
        logger.info("in:\t" + this.in + ParsedPageLink.START_SUFFIX_PATTERN + df.format(this.in.length()) + ")");
        logger.info("out:\t" + this.out + ParsedPageLink.START_SUFFIX_PATTERN + df.format(this.out.length()) + ")");
    }

    private void merge(Stack<File> stack, int i, int i2) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("merging " + stack.size() + " " + (this.compress ? "compressed" : "uncompressed") + " files... " + new Date());
        while (stack.size() > 1) {
            File pop = stack.pop();
            File pop2 = stack.pop();
            File createTempFile = File.createTempFile("merge", Integer.toString(i2), pop.getParentFile());
            new Merge(pop, pop2, createTempFile, i, this.compress);
            logger.info("deleting " + pop + "(" + pop.length() + "...");
            pop.delete();
            logger.info("deleting " + pop2 + "(" + pop2.length() + "...");
            pop2.delete();
            logger.info("pushing " + createTempFile + "(" + createTempFile.length() + "...");
            stack.push(createTempFile);
        }
        logger.info("merge done in " + df.format(System.currentTimeMillis() - currentTimeMillis) + " ms " + new Date());
    }

    private Map<String, List<String>> read(LineNumberReader lineNumberReader, int i, int i2, int i3) throws IOException {
        String readLine;
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        logger.info("(" + i3 + ") sorting " + df.format(i) + " lines starting from " + df.format(lineNumberReader.getLineNumber()) + "... " + new Date());
        TreeMap treeMap = new TreeMap();
        int i4 = 0;
        int i5 = 0;
        logger.info("lines\tsize\ttime\tdate");
        while (i4 < i && (readLine = lineNumberReader.readLine()) != null) {
            String[] split = tabPattern.split(readLine);
            if (split.length > i2) {
                List list = (List) treeMap.get(split[i2]);
                if (list == null) {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(readLine);
                    treeMap.put(split[i2], arrayList);
                } else {
                    list.add(readLine);
                }
            } else {
                i5++;
            }
            i4++;
            if (i4 % this.notificationPoint == 0) {
                logger.info(df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(treeMap.size()) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
        if (i5 > 0) {
            logger.warn(df.format(i5) + " lines where the number of \\t is lower than " + i2);
        }
        logger.info(df.format(i) + " lines sorted in " + df.format(System.currentTimeMillis() - currentTimeMillis2) + " ms");
        return treeMap;
    }

    private void write(Map<String, List<String>> map, File file) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        logger.info("writing " + df.format(map.size()) + " unique keys in " + file + "...");
        PrintWriter printWriter = this.compress ? new PrintWriter(new BufferedWriter(new OutputStreamWriter(new SnappyOutputStream(new FileOutputStream(file)), "UTF-8"))) : new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")));
        logger.info("lines\ttime\tdate");
        Iterator<String> it = map.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            List<String> list = map.get(it.next());
            for (int i2 = 0; i2 < list.size(); i2++) {
                printWriter.println(list.get(i2));
                i++;
                if (i % this.notificationPoint == 0) {
                    logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                    currentTimeMillis = System.currentTimeMillis();
                }
            }
        }
        printWriter.close();
        logger.info(df.format(this.size) + ParsedPageLink.START_SUFFIX_PATTERN + df.format(i) + ") lines wrote in " + (System.currentTimeMillis() - currentTimeMillis2) + " ms");
        logger.info("out:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file.length()) + ")");
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 5) {
            logger.info("java -mx1G org.fbk.cit.hlt.thewikimachine.csv.Sort in-file out-file col size compress");
            System.exit(1);
        }
        new Sort(new File(strArr[0]), new File(strArr[1]), Integer.parseInt(strArr[2]), Integer.parseInt(strArr[3]), Boolean.parseBoolean(strArr[4])).run();
    }
}
