package com.datastax.insight.ml.spark.ml.feature.selector;

import com.datastax.insight.spec.DataSetOperator;
import org.apache.spark.ml.feature.RFormula;
import org.apache.spark.ml.feature.RFormulaModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

public class RFormulaWrapper implements DataSetOperator {
    public static RFormula getOperator(String labelCol, String featuresCol, String f){
        RFormula formula = new RFormula()
                .setLabelCol(labelCol)
                .setFeaturesCol(featuresCol)
                .setFormula(f);
        return formula;
    }

    public static RFormulaModel fit(Dataset<Row> data,String labelCol, String featuresCol, String f){
        RFormula formula=getOperator(labelCol,featuresCol,f);
        RFormulaModel model=formula.fit(data);
        return model;
    }

    public static RFormulaModel fit(RFormula formula,Dataset<Row> data){
        RFormulaModel model=formula.fit(data);
        return model;
    }

    public static Dataset<Row> transform(RFormulaModel model,Dataset<Row> data){
        Dataset<Row> tdata=model.transform(data);
        return tdata;
    }
}
