package com.atilika.kuromoji;

import com.atilika.kuromoji.dict.CharacterDefinitions;
import com.atilika.kuromoji.dict.ConnectionCosts;
import com.atilika.kuromoji.dict.Dictionary;
import com.atilika.kuromoji.dict.InsertedDictionary;
import com.atilika.kuromoji.dict.TokenInfoDictionary;
import com.atilika.kuromoji.dict.UnknownDictionary;
import com.atilika.kuromoji.dict.UserDictionary;
import com.atilika.kuromoji.trie.DoubleArrayTrie;
import com.atilika.kuromoji.util.ResourceResolver;
import com.atilika.kuromoji.viterbi.TokenFactory;
import com.atilika.kuromoji.viterbi.ViterbiBuilder;
import com.atilika.kuromoji.viterbi.ViterbiFormatter;
import com.atilika.kuromoji.viterbi.ViterbiLattice;
import com.atilika.kuromoji.viterbi.ViterbiNode;
import com.atilika.kuromoji.viterbi.ViterbiSearcher;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:WEB-INF/lib/kuromoji-core-0.9.0.jar:com/atilika/kuromoji/TokenizerBase.class */
public abstract class TokenizerBase {
    private ViterbiBuilder viterbiBuilder;
    private ViterbiSearcher viterbiSearcher;
    private ViterbiFormatter viterbiFormatter;
    private boolean split;
    private TokenInfoDictionary tokenInfoDictionary;
    private UnknownDictionary unknownDictionary;
    private UserDictionary userDictionary;
    private InsertedDictionary insertedDictionary;
    protected TokenFactory tokenFactory;
    protected EnumMap<ViterbiNode.Type, Dictionary> dictionaryMap = new EnumMap<>(ViterbiNode.Type.class);

    /* loaded from: input_file:WEB-INF/lib/kuromoji-core-0.9.0.jar:com/atilika/kuromoji/TokenizerBase$Builder.class */
    public static abstract class Builder {
        protected DoubleArrayTrie doubleArrayTrie;
        protected ConnectionCosts connectionCosts;
        protected TokenInfoDictionary tokenInfoDictionary;
        protected UnknownDictionary unknownDictionary;
        protected CharacterDefinitions characterDefinitions;
        protected InsertedDictionary insertedDictionary;
        protected UserDictionary userDictionary = null;
        protected Mode mode = Mode.NORMAL;
        protected boolean split = true;
        protected List<Integer> penalties = Collections.emptyList();
        protected int totalFeatures = -1;
        protected int readingFeature = -1;
        protected int partOfSpeechFeature = -1;
        protected ResourceResolver resolver;
        protected TokenFactory tokenFactory;

        protected void loadDictionaries() {
            try {
                this.doubleArrayTrie = DoubleArrayTrie.newInstance(this.resolver);
                this.connectionCosts = ConnectionCosts.newInstance(this.resolver);
                this.tokenInfoDictionary = TokenInfoDictionary.newInstance(this.resolver);
                this.characterDefinitions = CharacterDefinitions.newInstance(this.resolver);
                this.unknownDictionary = UnknownDictionary.newInstance(this.resolver, this.characterDefinitions, this.totalFeatures);
                this.insertedDictionary = new InsertedDictionary(this.totalFeatures);
            } catch (Exception e) {
                throw new RuntimeException("Could not load dictionaries.", e);
            }
        }

        public abstract <T extends TokenizerBase> T build();

        public Builder userDictionary(InputStream inputStream) throws IOException {
            this.userDictionary = new UserDictionary(inputStream, this.totalFeatures, this.readingFeature, this.partOfSpeechFeature);
            return this;
        }

        public Builder userDictionary(String str) throws IOException {
            BufferedInputStream bufferedInputStream = new BufferedInputStream(new FileInputStream(str));
            userDictionary(bufferedInputStream);
            bufferedInputStream.close();
            return this;
        }
    }

    /* loaded from: input_file:WEB-INF/lib/kuromoji-core-0.9.0.jar:com/atilika/kuromoji/TokenizerBase$Mode.class */
    public enum Mode {
        NORMAL,
        SEARCH,
        EXTENDED
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void configure(Builder builder) {
        builder.loadDictionaries();
        this.tokenFactory = builder.tokenFactory;
        this.tokenInfoDictionary = builder.tokenInfoDictionary;
        this.unknownDictionary = builder.unknownDictionary;
        this.userDictionary = builder.userDictionary;
        this.insertedDictionary = builder.insertedDictionary;
        this.viterbiBuilder = new ViterbiBuilder(builder.doubleArrayTrie, this.tokenInfoDictionary, this.unknownDictionary, this.userDictionary, builder.mode);
        this.viterbiSearcher = new ViterbiSearcher(builder.mode, builder.connectionCosts, this.unknownDictionary, builder.penalties);
        this.viterbiFormatter = new ViterbiFormatter(builder.connectionCosts);
        this.split = builder.split;
        initDictionaryMap();
    }

    private void initDictionaryMap() {
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.KNOWN, (ViterbiNode.Type) this.tokenInfoDictionary);
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.UNKNOWN, (ViterbiNode.Type) this.unknownDictionary);
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.USER, (ViterbiNode.Type) this.userDictionary);
        this.dictionaryMap.put((EnumMap<ViterbiNode.Type, Dictionary>) ViterbiNode.Type.INSERTED, (ViterbiNode.Type) this.insertedDictionary);
    }

    public List<? extends TokenBase> tokenize(String str) {
        return createTokenList(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public <T extends TokenBase> List<T> createTokenList(String str) {
        if (!this.split) {
            return createTokenList(0, str);
        }
        List<Integer> splitPositions = getSplitPositions(str);
        if (splitPositions.size() == 0) {
            return createTokenList(0, str);
        }
        ArrayList arrayList = new ArrayList();
        int i = 0;
        Iterator<Integer> it = splitPositions.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            arrayList.addAll(createTokenList(i, str.substring(i, intValue + 1)));
            i = intValue + 1;
        }
        if (i < str.length()) {
            arrayList.addAll(createTokenList(i, str.substring(i)));
        }
        return arrayList;
    }

    public void debugTokenize(OutputStream outputStream, String str) throws IOException {
        ViterbiLattice build = this.viterbiBuilder.build(str);
        outputStream.write(this.viterbiFormatter.format(build, this.viterbiSearcher.search(build)).getBytes(StandardCharsets.UTF_8));
        outputStream.flush();
    }

    public void debugLattice(OutputStream outputStream, String str) throws IOException {
        outputStream.write(this.viterbiFormatter.format(this.viterbiBuilder.build(str)).getBytes(StandardCharsets.UTF_8));
        outputStream.flush();
    }

    private List<Integer> getSplitPositions(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        while (true) {
            int i2 = i;
            int indexOf = str.indexOf("。", i2);
            int indexOf2 = str.indexOf("、", i2);
            int max = (indexOf < 0 || indexOf2 < 0) ? Math.max(indexOf, indexOf2) : Math.min(indexOf, indexOf2);
            if (max < 0) {
                return arrayList;
            }
            arrayList.add(Integer.valueOf(max));
            i = max + 1;
        }
    }

    private <T extends TokenBase> List<T> createTokenList(int i, String str) {
        ArrayList arrayList = new ArrayList();
        for (ViterbiNode viterbiNode : this.viterbiSearcher.search(this.viterbiBuilder.build(str))) {
            int wordId = viterbiNode.getWordId();
            if (viterbiNode.getType() != ViterbiNode.Type.KNOWN || wordId != -1) {
                arrayList.add(this.tokenFactory.createToken(wordId, viterbiNode.getSurface(), viterbiNode.getType(), i + viterbiNode.getStartIndex(), this.dictionaryMap.get(viterbiNode.getType())));
            }
        }
        return arrayList;
    }
}
