package org.canova.nlp.vectorizer;

import java.util.Collection;
import java.util.HashSet;
import org.canova.api.conf.Configuration;
import org.canova.api.records.reader.RecordReader;
import org.canova.api.writable.Writable;
import org.canova.nlp.tokenization.tokenizer.Tokenizer;
import org.canova.nlp.tokenization.tokenizerfactory.DefaultTokenizerFactory;
import org.canova.nlp.tokenization.tokenizerfactory.TokenizerFactory;

/* loaded from: input_file:org/canova/nlp/vectorizer/TfidfVectorizer.class */
public abstract class TfidfVectorizer<VECTOR_TYPE> extends TextVectorizer<VECTOR_TYPE> {
    @Override // org.canova.nlp.vectorizer.TextVectorizer
    public void doWithTokens(Tokenizer tokenizer) {
        HashSet hashSet = new HashSet();
        while (tokenizer.hasMoreTokens()) {
            String nextToken = tokenizer.nextToken();
            this.cache.incrementCount(nextToken);
            if (!hashSet.contains(nextToken)) {
                this.cache.incrementDocCount(nextToken);
            }
        }
    }

    @Override // org.canova.nlp.vectorizer.TextVectorizer
    public TokenizerFactory createTokenizerFactory(Configuration configuration) {
        return new DefaultTokenizerFactory();
    }

    public abstract VECTOR_TYPE createVector(Object[] objArr);

    public abstract VECTOR_TYPE fitTransform(RecordReader recordReader);

    public abstract VECTOR_TYPE transform(Collection<Writable> collection);
}
