package org.fbk.cit.hlt.thewikimachine.csv;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.analysis.HardTokenizer;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.tukaani.xz.common.Util;
import org.xerial.snappy.SnappyInputStream;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/CSVExtractor.class */
public abstract class CSVExtractor {
    private int numThreads;
    private ExecutorService myExecutor;
    public static final int DEFAULT_THREADS_NUMBER = 1;
    public static final int DEFAULT_QUEUE_SIZE = 10000;
    public static final int DEFAULT_NOTIFICATION_POINT = 10000;
    private int notificationPoint;
    private int numPages;
    public static final int DEFAULT_NUM_PAGES = Integer.MAX_VALUE;
    static Logger logger = Logger.getLogger(CSVExtractor.class.getName());
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");

    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/csv/CSVExtractor$LineProcessor.class */
    public class LineProcessor implements Runnable {
        private String line;

        public LineProcessor(String str) {
            this.line = str;
        }

        @Override // java.lang.Runnable
        public void run() {
            CSVExtractor.this.processLine(this.line);
        }
    }

    public CSVExtractor() {
        this(1, Integer.MAX_VALUE);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public CSVExtractor(int i) {
        this(i, Integer.MAX_VALUE);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public CSVExtractor(int i, int i2) {
        this.numThreads = i;
        this.numPages = i2;
        this.notificationPoint = 10000;
        logger.info("creating the thread executor (" + i + ")");
        this.myExecutor = new ThreadPoolExecutor(i, i, 1L, TimeUnit.MINUTES, new ArrayBlockingQueue(10000), new ThreadPoolExecutor.CallerRunsPolicy());
    }

    public int getNumPages() {
        return this.numPages;
    }

    public abstract void processLine(String str);

    public abstract void start(ExtractorParameters extractorParameters);

    public abstract void end();

    public int getNumThreads() {
        return this.numThreads;
    }

    public void setNumThreads(int i) {
        this.numThreads = i;
    }

    public int getNotificationPoint() {
        return this.notificationPoint;
    }

    public void setNotificationPoint(int i) {
        this.notificationPoint = i;
    }

    public void read(String str) throws IOException {
        read(new File(str));
    }

    public void read(String str, boolean z) throws IOException {
        read(new File(str), z);
    }

    public void read(File file) throws IOException {
        read(file, false);
    }

    public void read(File file, boolean z) throws IOException {
        logger.info("reading a " + (z ? "compressed" : "uncompressed") + " csv from " + file + ParsedPageLink.START_SUFFIX_PATTERN + new Date() + ")...");
        long currentTimeMillis = System.currentTimeMillis();
        LineNumberReader lineNumberReader = z ? new LineNumberReader(new InputStreamReader(new SnappyInputStream(new FileInputStream(file)), "UTF-8")) : new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        HardTokenizer.getInstance();
        int i = 0;
        logger.info("totalFreq\tsize\ttime (ms)\tdate");
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                break;
            }
            if (i > this.numPages) {
                logger.info("Exit after " + i + " lines (" + this.numPages + ")");
                end();
                System.exit(0);
            }
            if (readLine.length() > 0) {
                this.myExecutor.execute(new LineProcessor(readLine));
            }
            if (i % this.notificationPoint == 0) {
                notification(i, currentTimeMillis, System.currentTimeMillis());
                currentTimeMillis = System.currentTimeMillis();
            }
            i++;
        }
        lineNumberReader.close();
        logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
        try {
            this.myExecutor.shutdown();
            logger.debug("waiting to end " + new Date() + "...");
            this.myExecutor.awaitTermination(Util.VLI_MAX, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e) {
            logger.error(e);
        }
        logger.info("ending the process " + new Date() + "...");
        end();
    }

    public void notification(int i, long j, long j2) {
        logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(j2 - j) + StringTable.HORIZONTAL_TABULATION + new Date());
    }
}
