package com.datastax.insight.ml.spark.ml.recommendation.als;

import com.datastax.insight.spec.DataSetOperator;
import com.datastax.insight.ml.spark.data.dataset.DataSetLoader;
import com.google.common.base.Strings;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;

/**
 * Created by huangping on 13/05/2017.
 */
public class RatingLoader implements DataSetOperator {
    private final static String DELIMITER="::";
    private final static String[] PROP_ORDERS=new String[]{"user","product","rating","timestamp"};

    public static Dataset<Rating> fromTextFile(String type, String path,String delimiter,String propList) {

        String[] propOrders=null;
        if(propList!=null) {
            propOrders=propList.split(",");
        }

        Integer[] orders = new Integer[PROP_ORDERS.length];
        for (int i = 0; i < PROP_ORDERS.length; i++) {
            for (int j = 0; j < propOrders.length; j++) {
                if(PROP_ORDERS[i].equals(propOrders[j])){
                    orders[i] = j;
                    break;
                }
            }
        }

        //TODO: 分隔符暂时无效，另外读取文件的一些熟悉无法设置
        String finalDelimiter;
        if(Strings.isNullOrEmpty(delimiter)) {
            finalDelimiter = DELIMITER;
        } else {
            finalDelimiter = delimiter;
        }

        return DataSetLoader.file(type, path).map((MapFunction<Row, Rating>) row -> {
//            String[] item = row.getString(0).split(finalDelimiter);
//            int user = Integer.parseInt(item[orders[0]]);
//            int product = Integer.parseInt(item[orders[1]]);
//            float rating = Float.parseFloat(item[orders[2]]);

            int user = Integer.parseInt(row.getString(orders[0]));
            int product = Integer.parseInt(row.getString(orders[1]));
            float rating = Float.parseFloat(row.getString(orders[2]));

            if(row.size() == 3) {
                return new Rating(user, product, rating);
            } else {
                long timestamp = Long.parseLong(row.getString(orders[3]));
                return new Rating(user, product, rating, timestamp);
            }
        }, Encoders$.MODULE$.bean(Rating.class));
    }
}
