package org.maochen.nlp.utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.maochen.nlp.sentencetypeclassifier.SentenceTypeClassifier;

/* loaded from: input_file:org/maochen/nlp/utils/StopwordsGenerator.class */
public class StopwordsGenerator {
    private Map<String, Double> wordCount = new ConcurrentHashMap();
    private AtomicLong totalCount = new AtomicLong(0);

    /* loaded from: input_file:org/maochen/nlp/utils/StopwordsGenerator$DocumentCount.class */
    static class DocumentCount {
        DocumentCount() {
        }

        /* JADX INFO: Access modifiers changed from: private */
        public static void addCount(String str, StopwordsGenerator stopwordsGenerator) {
            String stringNormalize = stopwordsGenerator.stringNormalize(str);
            if (stringNormalize.isEmpty()) {
                return;
            }
            stopwordsGenerator.totalCount.addAndGet(1L);
            ((Set) ((Stream) Arrays.stream(stringNormalize.split("\\s")).parallel()).collect(Collectors.toSet())).parallelStream().forEach(str2 -> {
                stopwordsGenerator.wordCount.put(str2, Double.valueOf((stopwordsGenerator.wordCount.containsKey(str2) ? (Double) stopwordsGenerator.wordCount.get(str2) : Double.valueOf(0.0d)).doubleValue() + 1.0d));
            });
        }

        public static void generateFromFile(File file, StopwordsGenerator stopwordsGenerator) {
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
                Throwable th = null;
                try {
                    try {
                        StringBuilder sb = new StringBuilder();
                        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                            if (readLine.trim().isEmpty()) {
                                Arrays.stream(sb.toString().split("\\.")).forEach(str -> {
                                    addCount(str, stopwordsGenerator);
                                });
                                sb.setLength(0);
                            } else {
                                sb.append(readLine);
                                sb.append(SentenceTypeClassifier.DELIMITER);
                            }
                        }
                        if (bufferedReader != null) {
                            if (0 != 0) {
                                try {
                                    bufferedReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                bufferedReader.close();
                            }
                        }
                    } catch (Throwable th3) {
                        th = th3;
                        throw th3;
                    }
                } finally {
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/maochen/nlp/utils/StopwordsGenerator$WikiSingleWordCount.class */
    public static class WikiSingleWordCount {
        WikiSingleWordCount() {
        }

        /* JADX WARN: Finally extract failed */
        public static void generateFromFile(File file, StopwordsGenerator stopwordsGenerator) {
            StringBuilder sb = new StringBuilder();
            try {
                BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
                Throwable th = null;
                try {
                    for (int read = bufferedReader.read(); read != -1; read = bufferedReader.read()) {
                        if (-1 > 0) {
                            if (stopwordsGenerator.totalCount.get() > -1) {
                                break;
                            }
                        }
                        if (read != 32) {
                            sb.append((char) read);
                        } else {
                            String stringNormalize = stopwordsGenerator.stringNormalize(sb.toString());
                            sb.setLength(0);
                            stopwordsGenerator.wordCount.put(stringNormalize, Double.valueOf((stopwordsGenerator.wordCount.containsKey(stringNormalize) ? (Double) stopwordsGenerator.wordCount.get(stringNormalize) : Double.valueOf(0.0d)).doubleValue() + 1.0d));
                            stopwordsGenerator.totalCount.addAndGet(1L);
                            if (stopwordsGenerator.totalCount.get() % 10000000 == 0) {
                                if (-1 > 0) {
                                    System.out.println("Processed tokens: " + ((stopwordsGenerator.totalCount.get() / (-1)) * 100.0d) + "%");
                                } else {
                                    System.out.println("Processed tokens: " + stopwordsGenerator.totalCount.get());
                                }
                            }
                        }
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                } catch (Throwable th3) {
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    throw th3;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public String stringNormalize(String str) {
        return str.replaceAll("\"", "").replaceAll(",", SentenceTypeClassifier.DELIMITER).replaceAll("\\p{Punct}+$", "").replaceAll("[?:;!]", "").replaceAll("--", "").replaceAll("i've", "i have").replaceAll("we'll", "we will").replaceAll("he's", "he has").replaceAll("'", SentenceTypeClassifier.DELIMITER).replaceAll("\\s+", SentenceTypeClassifier.DELIMITER).toLowerCase().trim();
    }

    public void normalize() {
        for (String str : this.wordCount.keySet()) {
            this.wordCount.put(str, Double.valueOf(this.wordCount.get(str).doubleValue() / this.totalCount.doubleValue()));
        }
    }

    public Map<String, Double> getProbability() {
        return this.wordCount;
    }

    public void writeFile(String str, List<Map.Entry<String, Double>> list) {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str)));
            list.stream().forEach(entry -> {
                try {
                    bufferedWriter.write(entry.toString() + System.lineSeparator());
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            bufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) {
        if (strArr.length != 2) {
            System.err.println("Please specify dir or filename | output file location");
            return;
        }
        StopwordsGenerator stopwordsGenerator = new StopwordsGenerator();
        File file = new File(strArr[0]);
        if (file.isFile()) {
            WikiSingleWordCount.generateFromFile(file, stopwordsGenerator);
        } else {
            ((Stream) Arrays.stream(file.listFiles()).parallel()).filter((v0) -> {
                return v0.isFile();
            }).forEach(file2 -> {
                WikiSingleWordCount.generateFromFile(file2, stopwordsGenerator);
            });
        }
        stopwordsGenerator.normalize();
        stopwordsGenerator.writeFile(strArr[1], (List) stopwordsGenerator.getProbability().entrySet().stream().sorted((entry, entry2) -> {
            return ((Double) entry2.getValue()).compareTo((Double) entry.getValue());
        }).collect(Collectors.toList()));
    }
}
