package org.apdplat.word.segmentation.impl;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apdplat.word.corpus.Bigram;
import org.apdplat.word.corpus.Trigram;
import org.apdplat.word.dictionary.Dictionary;
import org.apdplat.word.dictionary.DictionaryFactory;
import org.apdplat.word.recognition.PersonName;
import org.apdplat.word.recognition.Punctuation;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/segmentation/impl/AbstractSegmentation.class */
public abstract class AbstractSegmentation implements Segmentation {
    protected static final Logger LOGGER = LoggerFactory.getLogger(AbstractSegmentation.class);
    protected static final Dictionary DIC = DictionaryFactory.getDictionary();
    protected static final boolean PERSON_NAME_RECOGNIZE = "true".equals(WordConfTools.get("person.name.recognize", "true"));
    protected static final boolean KEEP_WHITESPACE = "true".equals(WordConfTools.get("keep.whitespace", "false"));
    protected static final boolean KEEP_PUNCTUATION = "true".equals(WordConfTools.get("keep.punctuation", "false"));
    private static final int INTERCEPT_LENGTH = WordConfTools.getInt("intercept.length", 16);
    private static final String NGRAM = WordConfTools.get("ngram", "bigram");
    private static final ExecutorService EXECUTOR_SERVICE = Executors.newFixedThreadPool(WordConfTools.getInt("thread.pool.size", 4));

    public abstract List<Word> segImpl(String str);

    public boolean ngramEnabled() {
        return "bigram".equals(NGRAM) || "trigram".equals(NGRAM);
    }

    public Map<List<Word>, Float> ngram(List<Word>... listArr) {
        if ("bigram".equals(NGRAM)) {
            return Bigram.bigram(listArr);
        }
        if ("trigram".equals(NGRAM)) {
            return Trigram.trigram(listArr);
        }
        return null;
    }

    public int getInterceptLength() {
        return DIC.getMaxLength() > INTERCEPT_LENGTH ? DIC.getMaxLength() : INTERCEPT_LENGTH;
    }

    @Override // org.apdplat.word.segmentation.Segmentation
    public List<Word> seg(String str) {
        List<String> seg = Punctuation.seg(str, KEEP_PUNCTUATION, new char[0]);
        if (seg.size() == 1) {
            return segSentence(seg.get(0));
        }
        ArrayList arrayList = new ArrayList(seg.size());
        Iterator<String> it = seg.iterator();
        while (it.hasNext()) {
            arrayList.add(submit(it.next()));
        }
        seg.clear();
        ArrayList arrayList2 = new ArrayList();
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            try {
                List list = (List) ((Future) it2.next()).get();
                if (list != null) {
                    arrayList2.addAll(list);
                }
            } catch (InterruptedException | ExecutionException e) {
                LOGGER.error("获取分词结果失败", e);
            }
        }
        arrayList.clear();
        return arrayList2;
    }

    private Future<List<Word>> submit(final String str) {
        return EXECUTOR_SERVICE.submit(new Callable<List<Word>>() { // from class: org.apdplat.word.segmentation.impl.AbstractSegmentation.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.concurrent.Callable
            public List<Word> call() {
                return AbstractSegmentation.this.segSentence(str);
            }
        });
    }

    /* JADX INFO: Access modifiers changed from: private */
    public List<Word> segSentence(String str) {
        if (str.length() == 1) {
            if (KEEP_WHITESPACE) {
                ArrayList arrayList = new ArrayList(1);
                arrayList.add(new Word(str));
                return arrayList;
            }
            if (!isWhiteSpace(str.charAt(0))) {
                ArrayList arrayList2 = new ArrayList(1);
                arrayList2.add(new Word(str));
                return arrayList2;
            }
        }
        if (str.length() <= 1) {
            return null;
        }
        List<Word> segImpl = segImpl(str);
        if (segImpl == null) {
            LOGGER.error("文本 " + str + " 没有获得分词结果");
            return null;
        }
        if (PERSON_NAME_RECOGNIZE) {
            segImpl = PersonName.recognize(segImpl);
        }
        return segImpl;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addWord(List<Word> list, String str, int i, int i2) {
        Word word = getWord(str, i, i2);
        if (word != null) {
            list.add(word);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addWord(Stack<Word> stack, String str, int i, int i2) {
        Word word = getWord(str, i, i2);
        if (word != null) {
            stack.push(word);
        }
    }

    protected Word getWord(String str, int i, int i2) {
        Word word = new Word(str.substring(i, i + i2).toLowerCase());
        if (!KEEP_WHITESPACE && i2 <= 1 && isWhiteSpace(str, i, i2)) {
            return null;
        }
        return word;
    }

    protected boolean isWhiteSpace(String str, int i, int i2) {
        return isWhiteSpace(str.charAt(i));
    }

    protected boolean isWhiteSpace(char c) {
        return c == ' ' || c == 12288 || c == '\t' || c == '\n';
    }

    public static void main(String[] strArr) {
    }
}
