package com.datastax.insight.ml.spark.ml.feature.selector;

import com.datastax.insight.spec.DataSetOperator;
import org.apache.spark.ml.feature.ChiSqSelector;
import org.apache.spark.ml.feature.ChiSqSelectorModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 卡方选择器
 */
public class ChiSqFeatureSelector implements DataSetOperator {
    public static ChiSqSelector getOperator(String labelCol, String outputCol, String featuresCol,int numTopFeatures){
        ChiSqSelector selector = new ChiSqSelector()
                .setLabelCol(labelCol)
                .setOutputCol(outputCol)
                .setFeaturesCol(featuresCol)
                .setNumTopFeatures(numTopFeatures);
        return selector;
    }

    public static ChiSqSelectorModel fit(Dataset<Row> data, String labelCol, String outputCol, String featuresCol,int numTopFeatures){
        ChiSqSelector selector=getOperator(labelCol,outputCol,featuresCol,numTopFeatures);
        ChiSqSelectorModel model=selector.fit(data);
        return model;
    }

    public static ChiSqSelectorModel fit(ChiSqSelector selector,Dataset<Row> data){
        ChiSqSelectorModel model=selector.fit(data);
        return model;
    }

    public static Dataset<Row> transform(ChiSqSelectorModel model,Dataset<Row> data){
        Dataset<Row> tdata=model.transform(data);
        return tdata;
    }
}
