package it.unimi.dsi.law.warc.tool;

import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.law.warc.io.GZWarcRecord;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.stat.SummaryStats;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/warc/tool/GZWarcStats.class */
public class GZWarcStats {
    private static final Logger LOGGER = LoggerFactory.getLogger(GZWarcStats.class);
    static final int IO_BUFFER_SIZE = 65536;

    public static long run(FastBufferedInputStream fastBufferedInputStream, SummaryStats summaryStats, SummaryStats summaryStats2, SummaryStats summaryStats3) throws IOException, WarcRecord.FormatException {
        GZWarcRecord gZWarcRecord = new GZWarcRecord();
        ProgressLogger progressLogger = new ProgressLogger(LOGGER, "records");
        progressLogger.logInterval = 10000L;
        progressLogger.start("Analyzing...");
        while (gZWarcRecord.read(fastBufferedInputStream) != -1) {
            GZWarcRecord.GZHeader gZHeader = gZWarcRecord.gzheader;
            summaryStats2.add(gZHeader.compressedSkipLength);
            summaryStats.add(gZHeader.uncompressedSkipLength);
            summaryStats3.add((int) ((100.0d * gZHeader.compressedSkipLength) / gZHeader.uncompressedSkipLength));
            progressLogger.update();
        }
        progressLogger.done();
        return progressLogger.count;
    }

    public static void main(String[] strArr) throws Exception {
        SimpleJSAP simpleJSAP = new SimpleJSAP(GZWarcStats.class.getName(), "Compute some statistics about a gzipped warc file.", new Parameter[]{new Switch("html", 'h', "html", "Generate output in HTML format."), new Switch("headers", 'H', "header", "Generate HTML table headers format."), new UnflaggedOption("warcFile", JSAP.STRING_PARSER, "-", true, false, "The gzipped Warc file basename (if not present, or -, stdin/stdout will be used).")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        String string = parse.getString("warcFile");
        boolean z = parse.getBoolean("html");
        boolean z2 = parse.getBoolean("headers");
        SummaryStats summaryStats = new SummaryStats();
        SummaryStats summaryStats2 = new SummaryStats();
        SummaryStats summaryStats3 = new SummaryStats();
        FastBufferedInputStream fastBufferedInputStream = new FastBufferedInputStream(string.equals("-") ? System.in : new FileInputStream(new File(string + ".warc.gz")), 65536);
        long run = run(fastBufferedInputStream, summaryStats, summaryStats2, summaryStats3);
        fastBufferedInputStream.close();
        if (!z) {
            System.out.println("Records: " + run);
            System.out.println("Compressed size: min = " + ((long) summaryStats2.min()) + ", max = " + ((long) summaryStats2.max()) + ", avg = " + summaryStats2.mean() + ", sd = " + summaryStats2.standardDeviation() + ", sum = " + ((long) summaryStats2.sum()));
            System.out.println("Uncompressed size: min = " + ((long) summaryStats.min()) + ", max = " + ((long) summaryStats.max()) + ", avg = " + summaryStats.mean() + ", sd = " + summaryStats.standardDeviation() + ", sum = " + ((long) summaryStats.sum()));
            System.out.println("Compression ratio: min = " + ((long) summaryStats3.min()) + "%, max = " + ((long) summaryStats3.max()) + "%, avg = " + summaryStats3.mean() + "%, sd = " + summaryStats3.standardDeviation() + "%");
            return;
        }
        if (z2) {
            System.out.println("<TABLE border='1'>");
            System.out.println("<TR><TH rowspan='2'>Name<TH rowspan='2'>Num.<br>Records<TH colspan='5'>Compressed byte size<TH colspan='5'>Uncompressed byte size<TH colspan='5'>Compression ratio (%)");
            System.out.println("<TR><TH>min<TH>max<TH>average<TH>stdev<TH>sum<TH>min<TH>max<TH>average<TH>stdev<TH>sum<TH>min<TH>max<TH>average<TH>stdev");
        }
        System.out.print("<tr><td>" + string + "<td>" + run);
        System.out.print("<td>" + ((long) summaryStats2.min()) + "<td>" + ((long) summaryStats2.max()) + "<td>" + (((int) (100.0d * summaryStats2.mean())) / 100.0d) + "<td>" + (((int) (100.0d * summaryStats2.standardDeviation())) / 100.0d) + "<td>" + ((long) summaryStats2.sum()));
        System.out.print("<td>" + ((long) summaryStats.min()) + "<td>" + ((long) summaryStats.max()) + "<td>" + (((int) (100.0d * summaryStats.mean())) / 100.0d) + "<td>" + (((int) (100.0d * summaryStats.standardDeviation())) / 100.0d) + "<td>" + ((long) summaryStats.sum()));
        System.out.print("<td>" + ((long) summaryStats3.min()) + "<td>" + ((long) summaryStats3.max()) + "<td>" + (((int) (100.0d * summaryStats3.mean())) / 100.0d) + "<td>" + (((int) (100.0d * summaryStats3.standardDeviation())) / 100.0d));
        System.out.println();
        if (z2) {
            System.out.println("</TABLE>");
        }
    }
}
