package org.fbk.cit.hlt.thewikimachine.classifier;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.tukaani.xz.common.Util;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/classifier/OneExamplePerSenseExtractor.class */
public abstract class OneExamplePerSenseExtractor {
    protected int numForms;
    protected int numThreads;
    private ExecutorService myExecutor;
    public static final int DEFAULT_THREADS_NUMBER = 1;
    public static final int DEFAULT_NOTIFICATION_POINT = 100000;
    public static final int PAGE_COLUMN_INDEX = 1;
    public static final int DEFAULT_MINIMUM_FORM_FREQ = 1;
    public static final int DEFAULT_MINIMUM_PAGE_FREQ = 1;
    public static final boolean DEFAULT_NORMALIZE = false;
    protected int notificationPoint;
    PrintWriter senseWriter;
    public static final int DEFAULT_NUM_FORMS = Integer.MAX_VALUE;
    public static final int DEFAULT_QUEUE_SIZE = 10000;
    private int minimumFormFreq;
    private int minimumPageFreq;
    protected boolean normalized;
    protected int tfType;
    static Logger logger = Logger.getLogger(OneExamplePerSenseExtractor.class.getName());
    protected static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    protected static Pattern spacePattern = Pattern.compile(" ");
    protected static DecimalFormat df = new DecimalFormat("###,###,###,###");

    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/classifier/OneExamplePerSenseExtractor$ExampleBuilder.class */
    public class ExampleBuilder implements Runnable {
        private List<String[]> list;
        private String form;

        public ExampleBuilder(List<String[]> list, String str) {
            this.list = list;
            this.form = str;
        }

        @Override // java.lang.Runnable
        public void run() {
            OneExamplePerSenseExtractor.this.buildExamples(OneExamplePerSenseExtractor.this.createSenseListMap(this.list), this.form);
        }
    }

    protected OneExamplePerSenseExtractor(String str, int i) throws IOException {
        this(new File(str), i);
    }

    public OneExamplePerSenseExtractor(File file, int i) throws IOException {
        this.numThreads = i;
        this.normalized = false;
        this.minimumFormFreq = 1;
        this.minimumPageFreq = 1;
        this.notificationPoint = 100000;
        logger.info("creating the thread executor (" + i + ")");
        this.myExecutor = new ThreadPoolExecutor(i, i, 1L, TimeUnit.MINUTES, new ArrayBlockingQueue(10000), new ThreadPoolExecutor.CallerRunsPolicy());
        this.senseWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")));
    }

    public int getTfType() {
        return this.tfType;
    }

    public void setTfType(int i) {
        this.tfType = i;
    }

    public void setNormalized(boolean z) {
        this.normalized = z;
    }

    public boolean isNormalized() {
        return this.normalized;
    }

    public int getMinimumFormFreq() {
        return this.minimumFormFreq;
    }

    public void setMinimumFormFreq(int i) {
        this.minimumFormFreq = i;
    }

    public int getMinimumPageFreq() {
        return this.minimumPageFreq;
    }

    public void setMinimumPageFreq(int i) {
        this.minimumPageFreq = i;
    }

    public int getNumForms() {
        return this.numForms;
    }

    public void setNumForms(int i) {
        this.numForms = i;
    }

    public int getNumThreads() {
        return this.numThreads;
    }

    public void setNumThreads(int i) {
        this.numThreads = i;
    }

    public int getNotificationPoint() {
        return this.notificationPoint;
    }

    public void setNotificationPoint(int i) {
        this.notificationPoint = i;
    }

    public void extract(String str) throws IOException {
        extract(new File(str));
    }

    public void extract(File file) throws IOException {
        int i;
        logger.info("reading " + file + "...");
        long currentTimeMillis = System.currentTimeMillis();
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        String str = "";
        String[] strArr = null;
        ArrayList arrayList = new ArrayList();
        logger.info("totalFreq\tcount\ttime\tdate");
        String readLine = lineNumberReader.readLine();
        try {
            if (readLine != null) {
                strArr = tabPattern.split(readLine);
                if (strArr.length == 9) {
                    arrayList.add(strArr);
                    str = strArr[2];
                    i3 = 0 + 1;
                }
            }
        } catch (Exception e) {
            logger.error("Error at line 0");
            logger.error(e);
        } finally {
        }
        while (true) {
            String readLine2 = lineNumberReader.readLine();
            if (readLine2 == null) {
                break;
            }
            if (i2 > this.numForms) {
                logger.info("Exit after " + i2 + " forms (" + this.numForms + ")");
                break;
            }
            try {
                strArr = tabPattern.split(readLine2);
                if (strArr.length == 9) {
                    if (!strArr[2].equals(str)) {
                        this.myExecutor.execute(new ExampleBuilder(arrayList, str));
                        arrayList = new ArrayList();
                        i2++;
                        i3 = 0;
                    }
                    arrayList.add(strArr);
                    str = strArr[2];
                    i3++;
                }
            } catch (Exception e2) {
                logger.error("Error at line " + i4);
                logger.error(e2);
            } finally {
            }
            if (i4 % this.notificationPoint == 0) {
                logger.info(df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(i2) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
        }
        lineNumberReader.close();
        arrayList.add(strArr);
        logger.debug("executing " + str + ParsedPageLink.START_SUFFIX_PATTERN + arrayList.size() + ")...");
        this.myExecutor.execute(new ExampleBuilder(arrayList, str));
        logger.info(df.format(i4) + StringTable.HORIZONTAL_TABULATION + df.format(i2) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
        try {
            this.myExecutor.shutdown();
            logger.info("waiting for execution...");
            this.myExecutor.awaitTermination(Util.VLI_MAX, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e3) {
            logger.error(e3);
        }
        logger.info("closing the streams...");
        this.senseWriter.close();
        end();
        logger.info("done it");
    }

    public abstract void buildExamples(Map<String, List<String[]>> map, String str);

    public abstract void end();

    Map<String, List<String[]>> createSenseListMap(List<String[]> list) {
        HashMap hashMap = new HashMap();
        for (int i = 0; i < list.size(); i++) {
            String[] strArr = list.get(i);
            String str = strArr[3];
            List list2 = (List) hashMap.get(str);
            if (list2 == null) {
                list2 = new ArrayList();
                hashMap.put(str, list2);
            }
            list2.add(strArr);
        }
        return hashMap;
    }
}
