package com.datastax.insight.ml.spark.ml.feature.transformer;

import com.datastax.insight.spec.DataSetOperator;
import org.apache.spark.ml.feature.MinMaxScaler;
import org.apache.spark.ml.feature.MinMaxScalerModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 最小最大值标准化
 */
public class MinMaxScalerWrapper implements DataSetOperator {
    /**
     * 最小最大值标准化转换器
     */
    public static MinMaxScaler getOperator(String inputCol, String outputCol, double min,double max){
        MinMaxScaler minMaxScaler = new MinMaxScaler()
                .setInputCol(inputCol)
                .setOutputCol(outputCol)
                .setMin(min)
                .setMax(max);
        return minMaxScaler;
    }

    public static MinMaxScalerModel fit(Dataset<Row> data, String inputCol, String outputCol, double min,double max){
        MinMaxScaler minMaxScaler=getOperator(inputCol,outputCol,min,max);
        MinMaxScalerModel model =minMaxScaler.fit(data);
        return model;
    }

    /**
     * 最小最大值标准化模型训练
     */
    public static MinMaxScalerModel fit(MinMaxScaler minMaxScaler,Dataset<Row> data){
        MinMaxScalerModel model =minMaxScaler.fit(data);
        return model;
    }

    /**
     * 最小最大值标准化转换
     */
    public static Dataset<Row> transform(MinMaxScalerModel model, Dataset<Row> data){
        Dataset<Row> tdata=model.transform(data);
        return tdata;
    }
}
