package org.carrot2.text.linguistic.lucene;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.util.MutableCharArray;
import org.carrot2.util.ExceptionUtils;

/* loaded from: input_file:org/carrot2/text/linguistic/lucene/ChineseTokenizerAdapter.class */
public final class ChineseTokenizerAdapter implements ITokenizer {
    private static final Pattern numeric = Pattern.compile("[\\-+'$]?\\d+([:\\-/,.]?\\d+)*[%$]?");
    private TokenStream wordTokenFilter;
    private CharTermAttribute term = null;
    private final MutableCharArray tempCharSequence = new MutableCharArray(new char[0]);
    private Tokenizer sentenceTokenizer = new SentenceTokenizer(new StringReader(""));

    @Override // org.carrot2.text.analysis.ITokenizer
    public short nextToken() throws IOException {
        if (!this.wordTokenFilter.incrementToken()) {
            return (short) -1;
        }
        char[] buffer = this.term.buffer();
        int length = this.term.length();
        this.tempCharSequence.reset(buffer, 0, length);
        return (length == 1 && buffer[0] == ',') ? (short) 3 : numeric.matcher(this.tempCharSequence).matches() ? (short) 2 : (short) 1;
    }

    @Override // org.carrot2.text.analysis.ITokenizer
    public void setTermBuffer(MutableCharArray mutableCharArray) {
        mutableCharArray.reset(this.term.buffer(), 0, this.term.length());
    }

    @Override // org.carrot2.text.analysis.ITokenizer
    public void reset(Reader reader) throws IOException {
        try {
            if (this.wordTokenFilter != null) {
                this.wordTokenFilter.end();
                this.wordTokenFilter.close();
            }
            this.sentenceTokenizer.setReader(reader);
            this.wordTokenFilter = new WordTokenFilter(this.sentenceTokenizer);
            this.term = this.wordTokenFilter.addAttribute(CharTermAttribute.class);
            this.wordTokenFilter.reset();
        } catch (Exception e) {
            throw ExceptionUtils.wrapAsRuntimeException(e);
        }
    }
}
