package com.efficient.common.util;

import cn.hutool.core.util.StrUtil;
import com.efficient.common.entity.HotWord;
import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.mining.word2vec.DocVectorModel;
import com.hankcs.hanlp.mining.word2vec.WordVectorModel;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;

/* loaded from: input_file:com/efficient/common/util/HanLPUtil.class */
public class HanLPUtil {
    private static final String FILTER_TERMS = "`~!@#$^&*()=|{}':;',\\[\\].<>/?~！@#￥……&*（）——|{}【】‘；：”“'。，、？";
    public static List<String> skipDictList = new ArrayList();
    public static List<String> skipNatureList = new ArrayList();
    public static DocVectorModel docVectorModel;

    public static List<HotWord> hotAnalyse(String str) {
        if (StrUtil.isBlank(str)) {
            return null;
        }
        List<Term> apply = CoreStopWordDictionary.apply(StandardTokenizer.segment(str));
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        ArrayList arrayList = new ArrayList();
        for (Term term : apply) {
            Iterator<String> it = skipNatureList.iterator();
            while (true) {
                if (!it.hasNext()) {
                    Iterator<String> it2 = skipDictList.iterator();
                    while (true) {
                        if (it2.hasNext()) {
                            if (it2.next().equals(term.word)) {
                                break;
                            }
                        } else {
                            hashMap2.put(term.word, term.nature.toString());
                            if (hashMap.containsKey(term.word)) {
                                hashMap.compute(term.word, (str2, num) -> {
                                    return Integer.valueOf(num.intValue() + 1);
                                });
                            } else {
                                hashMap.put(term.word, 1);
                            }
                        }
                    }
                } else {
                    if (term.nature.startsWith(it.next())) {
                        break;
                    }
                }
            }
        }
        hashMap.forEach((str3, num2) -> {
            HotWord hotWord = new HotWord();
            hotWord.setWord(str3);
            hotWord.setNature((String) hashMap2.get(str3));
            hotWord.setCount(num2);
            arrayList.add(hotWord);
        });
        return arrayList;
    }

    public static double vecSimilarity(String str, String str2) {
        return docVectorModel.similarity(str, str2);
    }

    public static void init(List<String> list, List<String> list2, List<String> list3, String str) throws IOException {
        if (cn.hutool.core.collection.CollUtil.isNotEmpty(list)) {
            list.forEach(CustomDictionary::add);
        }
        if (cn.hutool.core.collection.CollUtil.isNotEmpty(list2)) {
            skipDictList = list2;
        }
        if (cn.hutool.core.collection.CollUtil.isNotEmpty(list3)) {
            skipNatureList = list3;
        }
        if (cn.hutool.core.collection.CollUtil.isEmpty(list3)) {
            skipNatureList.add("m");
            skipNatureList.add("q");
            skipNatureList.add("t");
            skipNatureList.add("w");
        }
        if (StrUtil.isNotBlank(str)) {
            docVectorModel = new DocVectorModel(new WordVectorModel(str));
        }
    }

    public static double getSimilarity(String str, String str2) {
        List<String> splitWords = getSplitWords(str);
        List<String> splitWords2 = getSplitWords(str2);
        List<String> mergeList = mergeList(splitWords, splitWords2);
        return cosineSimilarity(vectorize(splitWords, mergeList), vectorize(splitWords2, mergeList));
    }

    private static List<String> getSplitWords(String str) {
        return (List) HanLP.segment(str).stream().filter(term -> {
            return !StrUtil.startWithAny(String.valueOf(term.nature.firstChar()), (CharSequence[]) skipNatureList.toArray(new String[0]));
        }).map(term2 -> {
            return term2.word;
        }).collect(Collectors.toList());
    }

    private static List<String> mergeList(List<String> list, List<String> list2) {
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(list);
        arrayList.addAll(list2);
        return (List) arrayList.stream().distinct().collect(Collectors.toList());
    }

    private static RealVector vectorize(List<String> list, List<String> list2) {
        double[] dArr = new double[list2.size()];
        for (int i = 0; i < list2.size(); i++) {
            dArr[i] = list.contains(list2.get(i)) ? 1.0d : 0.0d;
        }
        return new ArrayRealVector(dArr);
    }

    private static double cosineSimilarity(RealVector realVector, RealVector realVector2) {
        return realVector.cosine(realVector2);
    }

    private static int[] statistic(List<String> list, List<String> list2) {
        int[] iArr = new int[list.size()];
        for (int i = 0; i < list.size(); i++) {
            iArr[i] = Collections.frequency(list2, list.get(i));
        }
        return iArr;
    }
}
