package com.datastax.insight.ml.spark.ml.feature.selector;

import com.datastax.insight.spec.DataSetOperator;
import com.datastax.insight.core.Consts;
import org.apache.spark.ml.feature.VectorSlicer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 向量切分
 */
public class VectorSlicerWrapper implements DataSetOperator {
    public static VectorSlicer getOperator(String inputCol, String outputCol, String names,String indices){
        int[] ins = null;
        if (indices != null && indices.length() > 0) {
            String[] sps = indices.split(Consts.DELIMITER);
            ins = new int[sps.length];
            for (int i = 0; i < ins.length; i++) {
                ins[i] = Integer.parseInt(sps[i]);
            }
        }

        String[] ns=names.split(Consts.DELIMITER);

        VectorSlicer vectorSlicer = new VectorSlicer()
                .setInputCol(inputCol)
                .setOutputCol(outputCol)
                .setNames(ns);

        if(ins!=null){
            vectorSlicer.setIndices(ins);
        }
        return vectorSlicer;
    }

    public static Dataset<Row> transform(Dataset<Row> data, String inputCol, String outputCol, String names,String indices){
        VectorSlicer vectorSlicer=getOperator(inputCol,outputCol,names,indices);
        Dataset<Row> tdata=vectorSlicer.transform(data);
        return tdata;
    }

    public static Dataset<Row> transform(VectorSlicer vectorSlicer,Dataset<Row> data){
        Dataset<Row> tdata=vectorSlicer.transform(data);
        return tdata;
    }
}
