package org.fnlp.nlp.similarity.train;

import gnu.trove.set.hash.THashSet;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.fnlp.nlp.cn.ChineseTrans;
import org.fnlp.nlp.similarity.ISimilarity;
import org.fnlp.nlp.similarity.JaccardSimilarity;

/* loaded from: input_file:org/fnlp/nlp/similarity/train/WordSimilarity.class */
public class WordSimilarity {
    private static ChineseTrans tc = new ChineseTrans();
    ISimilarity is = new JaccardSimilarity();
    ArrayList<THashSet<String>> hashlist = new ArrayList<>();
    ArrayList<String> word = new ArrayList<>();
    HashSet<String> allC = new HashSet<>();
    HashMap<String, Integer> cmap = new HashMap<>();
    ArrayList<ArrayList<String>> clusterResult = new ArrayList<>();
    ArrayList<THashSet<String>> clusterHashList = new ArrayList<>();

    public void setHashlist(ArrayList<THashSet<String>> arrayList) {
        this.hashlist = arrayList;
    }

    public void setWord(ArrayList<String> arrayList) {
        this.word = arrayList;
    }

    public void setCmap(HashMap<String, Integer> hashMap) {
        this.cmap = hashMap;
    }

    private void countAllC(String str) throws Exception {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                set2List();
                System.out.println("Finished count all character");
                System.out.println("word size: " + this.word.size());
                return;
            }
            for (int i = 0; i < readLine.length(); i++) {
                this.allC.add(String.valueOf(readLine.charAt(i)));
            }
        }
    }

    private void set2List() {
        Iterator<String> it = this.allC.iterator();
        int i = 0;
        while (it.hasNext()) {
            String next = it.next();
            this.word.add(next);
            int i2 = i;
            i++;
            this.cmap.put(next, Integer.valueOf(i2));
        }
    }

    private void initHashSet() {
        for (int i = 0; i < this.word.size(); i++) {
            this.hashlist.add(new THashSet<>());
        }
    }

    public void dirSougouCAReader(String str, String str2) throws IOException {
        File[] listFiles = new File(str).listFiles();
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        for (File file : listFiles) {
            SougouCA sougouCA = new SougouCA(file.getAbsolutePath());
            while (sougouCA.hasNext()) {
                String normalize = tc.normalize((String) sougouCA.next().getData());
                if (normalize.length() != 0) {
                    bufferedWriter.write(normalize + "\n");
                }
            }
        }
        bufferedWriter.close();
        System.out.println("Done!");
    }

    public void gramString(String str) throws IOException {
        initHashSet();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        int i = 0;
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                System.out.println("Finished load file");
                return;
            } else {
                i++;
                if (i % 100000 == 0) {
                    System.out.println(i);
                }
                gramPerString(readLine);
            }
        }
    }

    public void calSimilarity(String str) throws Exception {
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str), "UTF-8"));
        for (int i = 0; i < this.hashlist.size(); i++) {
            bufferedWriter.write("\t" + this.word.get(i));
        }
        bufferedWriter.write("\n");
        for (int i2 = 0; i2 < this.hashlist.size(); i2++) {
            bufferedWriter.write(this.word.get(i2) + "\t");
            for (int i3 = 0; i3 < this.hashlist.size(); i3++) {
                bufferedWriter.write((Math.round(this.is.calc(this.hashlist.get(i2), this.hashlist.get(i3)) * 1000.0f) / 1000.0f) + "\t");
            }
            bufferedWriter.write("\n");
        }
        bufferedWriter.close();
    }

    private void gramPerString(String str) {
        String str2 = "^" + str + "*";
        for (int i = 1; i < str2.length() - 1; i++) {
            String substring = str2.substring(i, i + 1);
            if (this.cmap.containsKey(substring)) {
                this.hashlist.get(this.cmap.get(substring).intValue()).add(str2.substring(i - 1, i) + str2.substring(i + 1, i + 2));
            }
        }
    }

    private void cluster(int i) throws Exception {
        if (!checkValid(i)) {
            System.out.println("Do not need cluster");
            return;
        }
        initclusterResult(i);
        for (int i2 = i; i2 < this.word.size(); i2++) {
            merge(i2, clusterPerElement(i2));
            outputTerminal(i2);
        }
    }

    private int clusterPerElement(int i) throws Exception {
        THashSet<String> tHashSet = this.hashlist.get(i);
        float f = Float.MAX_VALUE;
        int i2 = 0;
        for (int i3 = 0; i3 < this.clusterHashList.size(); i3++) {
            float calc = this.is.calc(tHashSet, this.clusterHashList.get(i3));
            if (calc < f) {
                f = calc;
                i2 = i3;
            }
        }
        return i2;
    }

    private void merge(int i, int i2) {
        this.clusterHashList.get(i2).addAll(this.hashlist.get(i));
        this.clusterResult.get(i2).add(this.word.get(i));
    }

    private boolean checkValid(int i) {
        return i < this.word.size();
    }

    private void initclusterResult(int i) {
        for (int i2 = 0; i2 < i; i2++) {
            ArrayList<String> arrayList = new ArrayList<>();
            arrayList.add(this.word.get(i2));
            this.clusterResult.add(arrayList);
            THashSet<String> tHashSet = new THashSet<>();
            tHashSet.addAll(this.hashlist.get(i2));
            this.clusterHashList.add(tHashSet);
        }
    }

    private void outputTerminal(int i) {
        if ((1 + i) % 1000 == 0) {
            System.out.println(1 + i);
        }
    }

    public void biList2File(String str) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str), "UTF-8"));
        Iterator<ArrayList<String>> it = this.clusterResult.iterator();
        while (it.hasNext()) {
            Iterator<String> it2 = it.next().iterator();
            while (it2.hasNext()) {
                bufferedWriter.write(it2.next() + " ");
            }
            bufferedWriter.write("\n");
        }
        bufferedWriter.close();
    }

    protected Object loadObject(String str) throws IOException, ClassNotFoundException {
        ObjectInputStream objectInputStream = new ObjectInputStream(new BufferedInputStream(new GZIPInputStream(new FileInputStream(str))));
        Object readObject = objectInputStream.readObject();
        objectInputStream.close();
        return readObject;
    }

    protected void saveObject(String str, Object obj) throws IOException {
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(str))));
        objectOutputStream.writeObject(obj);
        objectOutputStream.close();
    }

    public void read(String str) throws IOException, ClassNotFoundException {
        setHashlist((ArrayList) loadObject(str + "hashlist"));
        setWord((ArrayList) loadObject(str + "word"));
        setCmap((HashMap) loadObject(str + "cmap"));
        System.out.println("Finished load model");
    }

    public void save(String str) throws IOException {
        saveObject(str + "hashlist", this.hashlist);
        saveObject(str + "word", this.word);
        saveObject(str + "cmap", this.cmap);
        System.out.println("Finished save to disk");
    }

    public static void main(String[] strArr) {
        WordSimilarity wordSimilarity = new WordSimilarity();
        try {
            wordSimilarity.read("./tmpdata/model/");
            wordSimilarity.cluster(100);
            wordSimilarity.biList2File("./tmpdata/clusterResult");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
