package com.datastax.insight.ml.spark.ml.feature.transformer;

import com.datastax.insight.spec.DataSetOperator;
import org.apache.spark.ml.feature.OneHotEncoder;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 二值化编码
 */
public class OneHotEncoderWrapper implements DataSetOperator {
    /**
     * 二值化编码器
     */
    public static OneHotEncoder getOperator(String inputCol, String outputCol, boolean dropLast){
        OneHotEncoder oneHotEncoder = new OneHotEncoder()
                .setInputCol(inputCol)
                .setOutputCol(outputCol)
                .setDropLast(dropLast);
        return oneHotEncoder;
    }

    /**
     * 二值化编码转换
     */
    public static Dataset<Row> transform(OneHotEncoder oneHotEncoder, Dataset<Row> data){
        Dataset<Row> tdata=oneHotEncoder.transform(data);
        return tdata;
    }
}
