package com.datastax.insight.ml.spark.mllib.statistics;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.linalg.Matrix;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.stat.KernelDensity;
import org.apache.spark.mllib.stat.Statistics;

public class MLStatistics {
    public static Matrix correlation(JavaRDD<Vector> vectors,String method){
        return Statistics.corr(vectors.rdd(),method);
    }

    public static double[] kde(JavaRDD<String> data,double[] values,double bandwidth){
        JavaRDD<Double> pData = data.map(new Function<String, Double>() {
            @Override
            public Double call(String value) throws Exception {
                return Double.parseDouble(value);
            }
        });

        // Construct the density estimator with the sample data
        // and a standard deviation for the Gaussian kernels
        KernelDensity kd = new KernelDensity().setSample(pData).setBandwidth(bandwidth);

        // Find density estimates for the given values
        double[] densities = kd.estimate(values);

        return densities;
    }
}
