package com.datastax.insight.ml.spark.ml.feature.transformer;

import com.datastax.insight.spec.DataSetOperator;
import com.datastax.insight.core.Consts;
import com.google.common.base.Strings;
import org.apache.spark.ml.feature.IndexToString;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 数值标签化
 */
public class IndexToStringWrapper implements DataSetOperator {

    public static IndexToString getOperator(String inputCol,String outputCol,String labels){

        IndexToString labelConverter = new IndexToString()
                .setInputCol(inputCol)
                .setOutputCol(outputCol);

        if(!Strings.isNullOrEmpty(labels)) {
            String[] labelArray=labels.split(Consts.DELIMITER);
            labelConverter.setLabels(labelArray);
        }
        return labelConverter;
    }

    /**
     * 数值标签化转换器
     */
    public static IndexToString getOperator(String inputCol,String outputCol,String[] labels){

        IndexToString labelConverter = new IndexToString()
                .setInputCol(inputCol)
                .setOutputCol(outputCol);

        if(labels != null) {
            labelConverter.setLabels(labels);
        }
        return labelConverter;
    }

    /**
     * 数值标签化转换
     */
    public static Dataset<Row> transform(IndexToString indexToString, Dataset<Row> data){
        Dataset<Row> tdata=indexToString.transform(data);
        return tdata;
    }
}
