package org.fbk.cit.hlt.thewikimachine.xmldump.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.tukaani.xz.common.Util;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/util/CollectGoodTemplates.class */
public class CollectGoodTemplates {
    static Logger logger = Logger.getLogger(CollectGoodTemplates.class.getName());
    private HashSet<String> goodTemplates;
    int threshold;
    public static final int DEFAULT_THREADS_NUMBER = 1;
    private int numThreads;
    public static final int DEFAULT_QUEUE_SIZE = 10000;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/util/CollectGoodTemplates$AnalyzeString.class */
    public class AnalyzeString implements Runnable {
        private String toBeAnalyzed;
        BufferedWriter w;
        HashSet<String> veryGoodTemplates;

        public AnalyzeString(String str, BufferedWriter bufferedWriter, HashSet<String> hashSet) {
            this.toBeAnalyzed = str;
            this.w = bufferedWriter;
            this.veryGoodTemplates = hashSet;
        }

        @Override // java.lang.Runnable
        public void run() {
            String[] split = this.toBeAnalyzed.split(StringTable.HORIZONTAL_TABULATION);
            if (split.length <= 4) {
                return;
            }
            String str = split[0];
            String str2 = split[1];
            try {
                int parseInt = Integer.parseInt(split[2]);
                int parseInt2 = Integer.parseInt(split[3]);
                if (parseInt - Integer.parseInt(split[4]) <= 2 && CollectGoodTemplates.this.goodTemplates.contains(str2) && parseInt2 > CollectGoodTemplates.this.threshold && str.length() > 0 && str2.length() > 0) {
                    synchronized (this.veryGoodTemplates) {
                        this.veryGoodTemplates.add(str2);
                    }
                    synchronized (this.w) {
                        try {
                            this.w.append((CharSequence) str).append('\t').append((CharSequence) str2).append('\n');
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                }
            } catch (Exception e2) {
            }
        }
    }

    public CollectGoodTemplates(String str, String str2, String str3, int i) throws IOException {
        this(str, str2, str3, i, 1, null);
    }

    public CollectGoodTemplates(String str, String str2, String str3, int i, int i2, String str4) throws IOException {
        this.goodTemplates = null;
        this.threshold = 0;
        this.numThreads = i2;
        File file = new File(str3);
        if (file.exists()) {
            file.delete();
        }
        if (!file.createNewFile()) {
            logger.error("File " + str3 + " not writeable!");
            System.exit(1);
        }
        if (!new File(str).exists()) {
            logger.error("File " + str + " does not exist!");
            System.exit(1);
        }
        if (!new File(str2).exists()) {
            logger.error("Template file " + str2 + " does not exist!");
            System.exit(1);
        }
        HashSet<String> hashSet = new HashSet<>();
        logger.info("Loading templates...");
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str2));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                String[] split = readLine.split(StringTable.HORIZONTAL_TABULATION);
                if (split.length >= 2) {
                    hashSet.add(split[0]);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        start(str, str3, hashSet, i, str4);
    }

    private void start(String str, String str2, HashSet<String> hashSet, int i, String str3) throws IOException {
        this.goodTemplates = hashSet;
        this.threshold = i;
        logger.info("Loading file in memory and writing it to disk...");
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2)));
        HashSet hashSet2 = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
        long j = 0;
        logger.info("creating the thread executor (" + this.numThreads + ")");
        ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(this.numThreads, this.numThreads, 1L, TimeUnit.MINUTES, new ArrayBlockingQueue(10000), new ThreadPoolExecutor.CallerRunsPolicy());
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            try {
                threadPoolExecutor.execute(new AnalyzeString(readLine, bufferedWriter, hashSet2));
            } catch (Exception e) {
                e.printStackTrace();
            }
            j++;
            if (j % 100000 == 0) {
                System.out.print(".");
            }
            if (j % 5000000 == 0) {
                System.out.println(" " + j);
            }
        }
        System.out.println("");
        try {
            threadPoolExecutor.shutdown();
            logger.debug("waiting to end " + new Date() + "...");
            threadPoolExecutor.awaitTermination(Util.VLI_MAX, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e2) {
            logger.error(e2);
        }
        bufferedWriter.flush();
        bufferedWriter.close();
        logger.info("Finished writing!");
        if (str3 != null) {
            logger.info("Write good templates to file");
            BufferedWriter bufferedWriter2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3)));
            Iterator it = hashSet2.iterator();
            while (it.hasNext()) {
                bufferedWriter2.write(((String) it.next()) + "\n");
            }
            bufferedWriter2.close();
        }
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length < 3) {
            logger.error("Wrong number of parameters " + strArr.length);
            System.out.println("java -mx6G org.fbk.cit.hlt.thewikimachine.xmldump.util.CollectGoodTemplates\n in-wiki-xml -- Input file\n out-global-folder -- Output global folder\n threshold -- Threshold\n");
            System.exit(-1);
        }
        int i = 0 + 1;
        String str = strArr[0];
        int i2 = i + 1;
        String str2 = strArr[i];
        int i3 = 0;
        if (strArr.length > i2) {
            int i4 = i2 + 1;
            i3 = Integer.parseInt(strArr[i2]);
        }
        if (!str2.endsWith(System.getProperty("file.separator"))) {
            str2 = str2 + System.getProperty("file.separator");
        }
        ExtractorParameters extractorParameters = new ExtractorParameters(str, str2);
        new CollectGoodTemplates(extractorParameters.getWikipediaTemplateFileNames().get("map-rep"), extractorParameters.getWikipediaTemplateFileNames().get("good"), extractorParameters.getWikipediaTemplateFileNames().get("pruned"), i3, 12, extractorParameters.getWikipediaTemplateFileNames().get("infoboxes"));
    }
}
