package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.Date;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.xerial.snappy.Snappy;
import org.xerial.snappy.SnappyInputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/WC.class */
public class WC {
    static Logger logger = Logger.getLogger(WC.class.getName());
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");

    public WC(File file, boolean z) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        long currentTimeMillis2 = System.currentTimeMillis();
        logger.info("counting a " + (z ? "compressed" : "uncompressed") + " file (" + new Date() + ")...");
        logger.info("in:\t" + file + ParsedPageLink.START_SUFFIX_PATTERN + df.format(file.length()) + ")");
        LineNumberReader lineNumberReader = z ? new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file)), "UTF-8")) : new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        long j = 0;
        long j2 = 0;
        long j3 = 0;
        long j4 = 0;
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                logger.info(df.format(j) + StringTable.HORIZONTAL_TABULATION + df.format(j2) + StringTable.HORIZONTAL_TABULATION + df.format(j3) + StringTable.HORIZONTAL_TABULATION + df.format(j4) + StringTable.HORIZONTAL_TABULATION + df.format((j4 / j3) * 100.0d) + "%\t" + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                lineNumberReader.close();
                logger.info("counting done in " + df.format(System.currentTimeMillis() - currentTimeMillis2) + " ms " + new Date());
                logger.info(df.format(j) + StringTable.HORIZONTAL_TABULATION + df.format(j2));
                return;
            }
            byte[] bytes = readLine.getBytes("UTF-8");
            j3 += bytes.length;
            j4 += Snappy.compress(bytes).length;
            j2 += readLine.length();
            j++;
            if (j % 10000000 == 0) {
                logger.info(df.format(j) + StringTable.HORIZONTAL_TABULATION + df.format(j2) + StringTable.HORIZONTAL_TABULATION + df.format(j3) + StringTable.HORIZONTAL_TABULATION + df.format(j4) + StringTable.HORIZONTAL_TABULATION + df.format((j4 / j3) * 100.0d) + "%\t" + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 2) {
            System.err.println("Wrong number of parameters " + strArr.length);
            System.err.println("Usage: java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.csv.WC in compress");
            System.exit(-1);
        }
        new WC(new File(strArr[0]), Boolean.parseBoolean(strArr[1]));
    }
}
