package org.fbk.cit.hlt.core.lsa.io;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Date;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/io/FileFreqFilter.class */
public class FileFreqFilter {
    static Logger logger = Logger.getLogger(FileFreqFilter.class.getName());
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);

    public FileFreqFilter(File file, File file2, int i) throws Exception {
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8")));
        printWriter.println("# Created by FileFreqFilter, " + new Date());
        printWriter.println("# input: " + file);
        printWriter.println("# output: " + file2);
        printWriter.println("# cutoff: " + i);
        printWriter.flush();
        int[] iArr = new int[11];
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i2 = 0;
        int i3 = 0;
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] split = tabPattern.split(readLine);
            if (split.length == 2) {
                int parseInt = Integer.parseInt(split[0]);
                if (parseInt >= i) {
                    i2++;
                    printWriter.println(split[1]);
                }
                if (parseInt <= 10) {
                    iArr[parseInt] = iArr[parseInt] + 1;
                } else {
                    iArr[0] = iArr[0] + 1;
                }
                if (i3 % 100000 == 0) {
                    System.out.print(".");
                }
                i3++;
            }
        }
        lineNumberReader.close();
        System.out.print("\n");
        logger.info(i2 + "/" + i3);
        double d = 0.0d;
        for (int i4 = 1; i4 < iArr.length; i4++) {
            if (iArr[i4] > 0) {
                d += iArr[i4] / i3;
                logger.info("F(" + i4 + ")=" + iArr[i4] + ParsedPageLink.START_SUFFIX_PATTERN + d + ")");
            }
        }
        if (iArr[0] > 0) {
            logger.info("F(freq>10)=" + iArr[0] + ParsedPageLink.START_SUFFIX_PATTERN + (d + (iArr[0] / i3)) + ")");
        }
        printWriter.flush();
        printWriter.close();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        long currentTimeMillis = System.currentTimeMillis();
        PropertyConfigurator.configure(property);
        if (strArr.length != 3) {
            System.out.println(getHelp());
            System.exit(1);
        }
        new FileFreqFilter(new File(strArr[0]), new File(strArr[1]), Integer.parseInt(strArr[2]));
        System.out.println("frequency filter applied in " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
    }

    private static String getHelp() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Usage: java -mx1024M com.rt.task2.FileFreqFilter input output f\n\n");
        stringBuffer.append("Arguments:\n");
        stringBuffer.append("\tinput\t-> file from which to read the df file \n");
        stringBuffer.append("\toutput\t-> output file\n");
        stringBuffer.append("\tf\t-> frequency cutoff\n");
        return stringBuffer.toString();
    }
}
