package com.datastax.insight.ml.spark.ml.regression;

import com.datastax.insight.spec.DataSetOperator;
import com.google.common.base.Strings;
import org.apache.spark.ml.regression.DecisionTreeRegressionModel;
import org.apache.spark.ml.regression.DecisionTreeRegressor;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

/**
 * 决策树回归
 */
public class DecisionTreeRegressionWrapper implements DataSetOperator {

    public static DecisionTreeRegressor getOperator(String labelCol,
                                                    String featuresCol,
                                                    Integer maxDepth,
                                                    Integer maxBins,
                                                    Integer minInstancesPerNode,
                                                    Double minInfoGain,
                                                    Integer maxMemoryInMB,
                                                    Boolean cacheNodeIds,
                                                    Integer checkpointInterval,
                                                    String impurity,
                                                    String varianceCol) {

        DecisionTreeRegressor regressor = new DecisionTreeRegressor();

        if (!Strings.isNullOrEmpty(labelCol)) {
            regressor.setLabelCol(labelCol);
        }

        if (!Strings.isNullOrEmpty(impurity)) {
            regressor.setFeaturesCol(featuresCol);
        }

        if (maxDepth != null) {
            regressor.setMaxDepth(maxDepth);
        }

        if (maxBins != null) {
            regressor.setMaxBins(maxBins);
        }

        if (minInstancesPerNode != null) {
            regressor.setMinInstancesPerNode(minInstancesPerNode);
        }

        if (minInfoGain != null) {
            regressor.setMinInfoGain(minInfoGain);
        }

        if (maxMemoryInMB != null) {
            regressor.setMaxMemoryInMB(maxMemoryInMB);
        }

        if (cacheNodeIds != null) {
            regressor.setCacheNodeIds(cacheNodeIds);
        }

        if (checkpointInterval != null) {
            regressor.setCheckpointInterval(checkpointInterval);
        }

        if (!Strings.isNullOrEmpty(impurity)) {
            regressor.setImpurity(impurity);
        }

        if (!Strings.isNullOrEmpty(varianceCol)) {
            regressor.setVarianceCol(varianceCol);
        }

        return regressor;
    }

    public static DecisionTreeRegressionModel fit(Dataset<Row> data,
                                                    String labelCol,
                                                    String featuresCol,
                                                    Integer maxDepth,
                                                    Integer maxBins,
                                                    Integer minInstancesPerNode,
                                                    Double minInfoGain,
                                                    Integer maxMemoryInMB,
                                                    Boolean cacheNodeIds,
                                                    Integer checkpointInterval,
                                                    String impurity,
                                                    String varianceCol) {
        DecisionTreeRegressor regressor = getOperator(labelCol,
                featuresCol,
                maxDepth,
                maxBins,
                minInstancesPerNode,
                minInfoGain,
                maxMemoryInMB,
                cacheNodeIds,
                checkpointInterval,
                impurity,
                varianceCol);
        return regressor.fit(data);
    }

    public static DecisionTreeRegressionModel fit(DecisionTreeRegressor regressor, Dataset<Row> data) {
        return regressor.fit(data);
    }

    public static Dataset<Row> transform(DecisionTreeRegressionModel model, Dataset<Row> data) {
        return model.transform(data);
    }
}
