package org.codelibs.elasticsearch.vi.analysis;

import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.codelibs.elasticsearch.vi.nlp.tokenizer.tokens.TaggedWord;

/* loaded from: input_file:org/codelibs/elasticsearch/vi/analysis/VietnameseTokenizer.class */
public class VietnameseTokenizer extends Tokenizer {
    private final List<TaggedWord> pending = new CopyOnWriteArrayList();
    private int offset = 0;
    private int pos = 0;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
    private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
    private final TaggedWordTokenizer taggedWordTokenizer;
    private String inputText;

    public VietnameseTokenizer(TaggedWordTokenizer taggedWordTokenizer) {
        this.taggedWordTokenizer = taggedWordTokenizer;
    }

    private void tokenize() throws IOException {
        this.inputText = IOUtils.toString(this.input);
        List<TaggedWord> list = this.taggedWordTokenizer.tokenize(new StringReader(this.inputText));
        if (list != null) {
            this.pending.addAll(list);
        }
    }

    public final boolean incrementToken() throws IOException {
        while (this.pending.isEmpty()) {
            tokenize();
            if (this.pending.isEmpty()) {
                return false;
            }
        }
        clearAttributes();
        for (int i = this.pos; i < this.pending.size(); i++) {
            this.pos++;
            TaggedWord taggedWord = this.pending.get(i);
            if (accept(taggedWord)) {
                this.posIncrAtt.setPositionIncrement(1);
                int length = taggedWord.getText().length();
                this.typeAtt.setType(String.format("<%s>", taggedWord.getRule().getName().toUpperCase()));
                this.termAtt.copyBuffer(taggedWord.getText().toCharArray(), 0, length);
                int indexOf = this.inputText.indexOf(taggedWord.getText(), i);
                OffsetAttribute offsetAttribute = this.offsetAtt;
                int correctOffset = correctOffset(indexOf);
                int correctOffset2 = correctOffset(indexOf + length);
                this.offset = correctOffset2;
                offsetAttribute.setOffset(correctOffset, correctOffset2);
                return true;
            }
        }
        return false;
    }

    private final boolean accept(TaggedWord taggedWord) {
        String lowerCase = taggedWord.getRule().getName().toLowerCase();
        return ("punctuation".equals(lowerCase) || "special".equals(lowerCase)) ? false : true;
    }

    public final void end() throws IOException {
        super.end();
        int correctOffset = correctOffset(this.offset);
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }

    public void reset() throws IOException {
        super.reset();
        this.pos = 0;
        this.offset = 0;
        this.pending.clear();
    }
}
