package org.apdplat.word.dictionary;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apdplat.word.recognition.RecognitionTool;
import org.apdplat.word.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/dictionary/DictionaryTools.class */
public class DictionaryTools {
    private static final Logger LOGGER = LoggerFactory.getLogger(DictionaryTools.class);

    public static void main(String[] strArr) throws IOException {
        ArrayList arrayList = new ArrayList();
        arrayList.add("src/main/resources/dic.txt");
        arrayList.add("target/dic.txt");
        merge(arrayList, "src/main/resources/dic.txt");
    }

    public static void removePhraseFromDic(String str, String str2) {
        try {
            HashSet hashSet = new HashSet();
            Iterator<String> it = Files.readAllLines(Paths.get(str, new String[0]), Charset.forName("utf-8")).iterator();
            while (it.hasNext()) {
                String[] split = it.next().split("=");
                if (split != null && split.length == 2) {
                    hashSet.add(split[0]);
                }
            }
            ArrayList arrayList = new ArrayList();
            List<String> readAllLines = Files.readAllLines(Paths.get(str2, new String[0]), Charset.forName("utf-8"));
            int size = readAllLines.size();
            for (String str3 : readAllLines) {
                if (!hashSet.contains(str3)) {
                    arrayList.add(str3);
                }
            }
            readAllLines.clear();
            hashSet.clear();
            Files.write(Paths.get(str2, new String[0]), arrayList, Charset.forName("utf-8"), new OpenOption[0]);
            LOGGER.info("移除短语结构数目：" + (size - arrayList.size()));
        } catch (Exception e) {
            LOGGER.error("移除短语结构失败：", e);
        }
    }

    public static void merge(List<String> list, String str) throws IOException {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.addAll(Files.readAllLines(Paths.get(it.next(), new String[0]), Charset.forName("utf-8")));
        }
        HashSet hashSet = new HashSet();
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            String trim = ((String) it2.next()).trim();
            if (trim.length() > 4 || trim.length() < 2 || !Utils.isChineseCharAndLengthAtLeastTwo(trim) || RecognitionTool.recog(trim)) {
                LOGGER.debug("过滤：" + trim);
            } else {
                hashSet.add(trim);
            }
        }
        LOGGER.info("合并词数：" + arrayList.size());
        LOGGER.info("保留词数：" + hashSet.size());
        arrayList.clear();
        ArrayList arrayList2 = new ArrayList();
        arrayList2.addAll(hashSet);
        hashSet.clear();
        Collections.sort(arrayList2);
        Files.write(Paths.get(str, new String[0]), arrayList2, Charset.forName("utf-8"), new OpenOption[0]);
    }
}
