package org.apache.lucene.analysis.opennlp;

import java.io.IOException;
import opennlp.tools.util.Span;
import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.class */
public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
    public static int EOS_FLAG_BIT = 1;
    private final CharTermAttribute termAtt;
    private final FlagsAttribute flagsAtt;
    private final OffsetAttribute offsetAtt;
    private Span[] termSpans;
    private int termNum;
    private int sentenceStart;
    private NLPTokenizerOp tokenizerOp;

    public OpenNLPTokenizer(AttributeFactory attributeFactory, NLPSentenceDetectorOp nLPSentenceDetectorOp, NLPTokenizerOp nLPTokenizerOp) throws IOException {
        super(attributeFactory, new OpenNLPSentenceBreakIterator(nLPSentenceDetectorOp));
        this.termAtt = addAttribute(CharTermAttribute.class);
        this.flagsAtt = addAttribute(FlagsAttribute.class);
        this.offsetAtt = addAttribute(OffsetAttribute.class);
        this.termSpans = null;
        this.termNum = 0;
        this.sentenceStart = 0;
        this.tokenizerOp = null;
        if (nLPSentenceDetectorOp == null || nLPTokenizerOp == null) {
            throw new IllegalArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
        }
        this.tokenizerOp = nLPTokenizerOp;
    }

    public void close() throws IOException {
        super.close();
        this.termSpans = null;
        this.sentenceStart = 0;
        this.termNum = 0;
    }

    protected void setNextSentence(int i, int i2) {
        this.sentenceStart = i;
        this.termSpans = this.tokenizerOp.getTerms(new String(this.buffer, i, i2 - i));
        this.termNum = 0;
    }

    protected boolean incrementWord() {
        if (this.termSpans == null || this.termNum == this.termSpans.length) {
            return false;
        }
        clearAttributes();
        Span span = this.termSpans[this.termNum];
        this.termAtt.copyBuffer(this.buffer, this.sentenceStart + span.getStart(), span.length());
        this.offsetAtt.setOffset(correctOffset(this.offset + this.sentenceStart + span.getStart()), correctOffset(this.offset + this.sentenceStart + span.getEnd()));
        if (this.termNum == this.termSpans.length - 1) {
            this.flagsAtt.setFlags(this.flagsAtt.getFlags() | EOS_FLAG_BIT);
        }
        this.termNum++;
        return true;
    }

    public void reset() throws IOException {
        super.reset();
        this.termSpans = null;
        this.sentenceStart = 0;
        this.termNum = 0;
    }
}
