package com.datastax.insight.ml.spark.ml.feature.transformer;

import com.datastax.insight.spec.DataSetOperator;
import org.apache.spark.ml.feature.PCA;
import org.apache.spark.ml.feature.PCAModel;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 主成分分析
 */
public class PCAReducer implements DataSetOperator {
    /**
     * 主成分分析器
     */
    public static PCA getOperator(String inputCol, String outputCol, int k){
        PCA pca = new PCA()
                .setInputCol(inputCol)
                .setOutputCol(outputCol)
                .setK(k);
        return pca;
    }

    public static PCAModel fit(Dataset<Row> data, String inputCol, String outputCol, int k){
        PCA pca=getOperator(inputCol,outputCol,k);
        PCAModel model=pca.fit(data);
        return model;
    }

    /**
     * 主成分分析模型训练
     */
    public static PCAModel fit(PCA pca,Dataset<Row> data){
        PCAModel model=pca.fit(data);
        return model;
    }

    /**
     * 主成分分析
     */
    public static Dataset<Row> transform(PCAModel model,Dataset<Row> data){
        Dataset<Row> tdata=model.transform(data);
        return tdata;
    }
}
