package org.deeplearning4j.spark.models.sequencevectors.functions;

import java.util.List;
import lombok.NonNull;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.broadcast.Broadcast;
import org.deeplearning4j.models.embeddings.loader.VectorsConfiguration;
import org.deeplearning4j.models.sequencevectors.sequence.Sequence;
import org.deeplearning4j.models.word2vec.VocabWord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/deeplearning4j/spark/models/sequencevectors/functions/TokenizerFunction.class */
public class TokenizerFunction extends BaseTokenizerFunction implements Function<String, Sequence<VocabWord>> {
    private static final Logger log = LoggerFactory.getLogger(TokenizerFunction.class);

    public TokenizerFunction(@NonNull Broadcast<VectorsConfiguration> broadcast) {
        super(broadcast);
        if (broadcast == null) {
            throw new NullPointerException("configurationBroadcast is marked non-null but is null");
        }
    }

    public Sequence<VocabWord> call(String str) throws Exception {
        if (this.tokenizerFactory == null) {
            instantiateTokenizerFactory();
        }
        List<String> tokens = this.tokenizerFactory.create(str).getTokens();
        Sequence<VocabWord> sequence = new Sequence<>();
        for (String str2 : tokens) {
            if (str2 != null && !str2.isEmpty()) {
                sequence.addElement(new VocabWord(1.0d, str2));
            }
        }
        return sequence;
    }
}
