package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.ml.feature.RegexTokenizer;
import org.apache.spark.ml.feature.Tokenizer;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

/* loaded from: input_file:org/apache/spark/examples/ml/JavaTokenizerExample.class */
public class JavaTokenizerExample {
    public static void main(String[] strArr) {
        SparkSession orCreate = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate();
        Iterator it = new Tokenizer().setInputCol("sentence").setOutputCol("words").transform(orCreate.createDataFrame(Arrays.asList(RowFactory.create(new Object[]{0, "Hi I heard about Spark"}), RowFactory.create(new Object[]{1, "I wish Java could use case classes"}), RowFactory.create(new Object[]{2, "Logistic,regression,models,are,neat"})), new StructType(new StructField[]{new StructField("label", DataTypes.IntegerType, false, Metadata.empty()), new StructField("sentence", DataTypes.StringType, false, Metadata.empty())}))).select("words", new String[]{"label"}).takeAsList(3).iterator();
        while (it.hasNext()) {
            Iterator it2 = ((Row) it.next()).getList(0).iterator();
            while (it2.hasNext()) {
                System.out.print(((String) it2.next()) + " ");
            }
            System.out.println();
        }
        new RegexTokenizer().setInputCol("sentence").setOutputCol("words").setPattern("\\W");
        orCreate.stop();
    }
}
