package com.datastax.insight.ml.spark.mllib.association;

import com.datastax.insight.spec.RDDOperator;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.fpm.PrefixSpan;
import org.apache.spark.mllib.fpm.PrefixSpanModel;

import java.util.ArrayList;
import java.util.List;

public class PrefixSpanEvaluator implements RDDOperator {
    public static PrefixSpanModel<Integer> evaluate(JavaRDD<String> rdd,String deliOne,String deliTwo,double minSupport,int maxPatternLength){
        JavaRDD<List<List<Integer>>> sequences=rdd.map(new Function<String, List<List<Integer>>>() {
            @Override
            public List<List<Integer>> call(String line) throws Exception {
                List<List<Integer>> dataList=new ArrayList<>();
                String[] texts=line.split(deliOne);
                for(String text: texts){
                    List<Integer> list=new ArrayList<>();
                    String[] items=text.split(deliTwo);
                    for(String item : items){
                        list.add(Integer.parseInt(item));
                    }
                    dataList.add(list);
                }
                return dataList;
            }
        });
        return evaluate(sequences,minSupport,maxPatternLength);
    }

    public static PrefixSpanModel<Integer> evaluate(JavaRDD<List<List<Integer>>> sequences,double minSupport,int maxPatternLength){
        PrefixSpan prefixSpan = new PrefixSpan()
                .setMinSupport(minSupport)
                .setMaxPatternLength(maxPatternLength);
        PrefixSpanModel<Integer> model = prefixSpan.run(sequences);
        return model;
    }
}
