package com.datastax.insight.ml.spark.mllib.evaluator;

import com.alibaba.fastjson.JSON;
import com.datastax.insight.core.entity.Metrics;
import com.datastax.insight.spec.RDDOperator;
import com.datastax.insight.core.service.PersistService;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.classification.LogisticRegressionModel;
import org.apache.spark.mllib.evaluation.RegressionMetrics;
import org.apache.spark.mllib.regression.IsotonicRegressionModel;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.regression.RegressionModel;
import org.apache.spark.mllib.tree.model.DecisionTreeModel;
import org.apache.spark.mllib.tree.model.TreeEnsembleModel;
import org.apache.spark.mllib.util.Saveable;
import scala.Tuple2;
import scala.Tuple3;

/**
 * Created by huangping on 17-1-16.
 */
public class RegressionMetricsWrapper implements RDDOperator {

    public Metrics evaluation4Isotonic(Saveable model, JavaRDD<Tuple3<Double, Double, Double>> data) {

        JavaRDD<Tuple2<Object, Object>> scoreAndLabels = null;

        if (model instanceof IsotonicRegressionModel) {
            IsotonicRegressionModel isotonicRegressionModel = (IsotonicRegressionModel) model;
            scoreAndLabels = data.map(d -> new Tuple2<>(isotonicRegressionModel.predict(d._2()), d._1()));
        } else {
            String message = "[" + model.getClass().getTypeName() + "] is not supported, currently supports: IsotonicRegressionModel";
            throw new IllegalArgumentException(message);
        }

        return evaluation(scoreAndLabels);
    }

    public Metrics evaluation(Saveable model, JavaRDD<LabeledPoint> data) {

        JavaRDD<Tuple2<Object, Object>> scoreAndLabels = null;

        if (model instanceof LogisticRegressionModel) {
            LogisticRegressionModel realModel = (LogisticRegressionModel) model;
            scoreAndLabels = data.map(d -> new Tuple2<>(realModel.predict(d.features()), d.label()));
        } else if (model instanceof RegressionModel) {
            RegressionModel realModel = (RegressionModel) model;
            scoreAndLabels = data.map(d -> new Tuple2<>(realModel.predict(d.features()), d.label()));
        } else if (model instanceof DecisionTreeModel) {
            DecisionTreeModel realModel = (DecisionTreeModel) model;
            scoreAndLabels = data.map(d -> new Tuple2<>(realModel.predict(d.features()), d.label()));
        } else if (model instanceof TreeEnsembleModel) {
            TreeEnsembleModel realModel = (TreeEnsembleModel) model;
            scoreAndLabels = data.map(d -> new Tuple2<>(realModel.predict(d.features()), d.label()));
        } else {
            String message = "[" + model.getClass().getTypeName() + "] is not supported, currently supports: LogisticRegressionModel, RegressionModel, ClassificationModel, DecisionTreeModel, TreeEnsembleModel, IsotonicRegressionModel";
            throw new IllegalArgumentException(message);
        }

        return evaluation(scoreAndLabels);
    }

    private Metrics evaluation(JavaRDD<Tuple2<Object, Object>> scoreAndLabels) {
        Metrics metrics = new Metrics();

        RegressionMetrics regressionMetrics = new RegressionMetrics(scoreAndLabels.rdd());

        metrics.getIndicator().setMse(regressionMetrics.meanSquaredError());
        metrics.getIndicator().setRmse(regressionMetrics.rootMeanSquaredError());
        metrics.getIndicator().setMae(regressionMetrics.meanAbsoluteError());
        metrics.getIndicator().setR2(regressionMetrics.r2());
        metrics.getIndicator().setExplainedVariance(regressionMetrics.explainedVariance());

        PersistService.invoke("com.datastax.insight.agent.dao.InsightDAO",
                "saveModelMetrics",
                new String[]{Long.class.getTypeName(), String.class.getTypeName()},
                new Object[]{PersistService.getFlowId(), JSON.toJSONString(metrics)});

        return metrics;
    }
}
