/*
 * Decompiled with CFR 0.152.
 */
package com.datastax.data.prepare.spark.dataset;

import com.datastax.data.prepare.spark.dataset.j;
import com.datastax.data.prepare.util.a;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.spec.Operator;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class OutlierDetectionOperator
implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(OutlierDetectionOperator.class);

    @InsightComponent(name="\u79bb\u7fa4\u70b9\u68c0\u6d4b(\u8ddd\u79bb)", type="com.datastax.insight.dataprprocess.detectOutlier.distance", description="\u901a\u8fc7\u7b2ck\u90bb\u8fd1\u8ddd\u79bb\u5224\u65ad\u6570\u636e\u70b9\uff0c\u53d6k\u90bb\u8fd1\u8ddd\u79bb\u7684\u6240\u6709\u70b9\u7684\u5e73\u5747\u8ddd\u79bb\u7684\u524dn\u4e2a\u70b9\u4f5c\u4e3a\u79bb\u7fa4\u70b9")
    public static <T> Dataset<T> distanceOutlier(@InsightComponentArg(externalInput=true, name="\u6570\u636e\u96c6", description="\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u5217\u540d", description="\u9009\u62e9\u79bb\u7fa4\u70b9\u7684\u5217\u540d\uff0c\u7528\u5206\u53f7\u9694\u5f00") String columns, @InsightComponentArg(name="\u8fd1\u90bb", description="\u901a\u8fc7\u8bbe\u7f6e\u8fd1\u90bb\u70b9\u7684\u6570\u91cf\u6765\u5f97\u5230k\u90bb\u8fd1\u8ddd\u79bb") int neighbors, @InsightComponentArg(name="\u79bb\u7fa4\u70b9\u6570\u91cf", description="\u8bbe\u7f6e\u79bb\u7fa4\u70b9\u6570\u91cf, \u53d6\u8ddd\u79bb\u524dn\u4e2a\u70b9") int outliers, @InsightComponentArg(name="\u8ddd\u79bb\u65b9\u6cd5", description="\u8ddd\u79bb\u8ba1\u7b97\u65b9\u6cd5", defaultValue="\u6b27\u5f0f\u8ddd\u79bb", items="\u6b27\u5f0f\u8ddd\u79bb;\u5e73\u65b9\u8ddd\u79bb;\u4f59\u5f26\u8ddd\u79bb;\u53cd\u4f59\u5f26\u8ddd\u79bb") String type) {
        if (data == null) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u6570\u636e\u96c6\u4e3a\u7a7a");
        }
        if (neighbors <= 0) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u8ddd\u79bb\u67d0\u70b9\u7684\u7b2ck\u70b9\u8ddd\u79bb\u7684k\u5c0f\u4e8e\u6216\u8005\u7b49\u4e8e0");
        }
        if (outliers <= 0) {
            throw new a("distances\u7684\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u79bb\u7fa4\u70b9\u6570\u91cfn\u5c0f\u4e8e\u6216\u8005\u7b49\u4e8e0");
        }
        if (columns == null || columns.length() == 0) {
            throw new a("distances\u7684\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u62e9\u7684\u5217\u540d\u4e3a\u7a7a");
        }
        String[] cols = columns.split(";");
        String[] result = j.a(data.schema(), cols);
        if (result.length == 0) {
            logger.info("distances\u7684\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u4e2d\u7684\u5217\u4e2d\u6ca1\u6709\u53ef\u7528\u4e8e\u79bb\u7fa4\u70b9\u8ba1\u7b97, \u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if (data.count() >= 10000L) {
            logger.info("\u6570\u636e\u96c6\u5927\u4e8e10000\u884c\uff0c\u6682\u65f6\u4e0d\u652f\u6301\u4e8e\u79bb\u7fa4\u70b9\u68c0\u6d4b\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        return j.b((Dataset<Row>)data.toDF(), result, neighbors, outliers, type);
    }

    @InsightComponent(name="\u79bb\u7fa4\u70b9\u68c0\u6d4b(\u5bc6\u5ea6)", type="com.datastax.insight.dataprprocess.detectOutlier.densities", description="\u901a\u8fc7\u67d0\u4e00\u70b9\u5728\u8ddd\u79bbd\u8303\u56f4\u5185\u7684\u70b9\u548c\u6240\u6709\u70b9\u7684\u6bd4\u4f8b\u5f97\u51fa\u70b9\u7684\u5bc6\u5ea6, \u518d\u548c\u8bbe\u7f6e\u7684\u6982\u7387\u76f8\u6bd4\u5f97\u51fa\u79bb\u7fa4\u70b9")
    public static <T> Dataset<T> densitiesOutlier(@InsightComponentArg(externalInput=true, name="\u6570\u636e\u96c6", description="\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u5217\u540d", description="\u9009\u62e9\u79bb\u7fa4\u70b9\u7684\u5217\u540d\uff0c\u7528\u5206\u53f7\u9694\u5f00") String columns, @InsightComponentArg(name="\u8ddd\u79bb", description="\u8ddd\u79bb") double distance, @InsightComponentArg(name="\u6bd4\u4f8b", description="\u5c0f\u4e8e\u8be5\u6bd4\u4f8b\u7684\u70b9\u5c06\u8bbe\u4e3a\u79bb\u7fa4\u70b9") double proportion, @InsightComponentArg(name="\u8ddd\u79bb\u65b9\u6cd5", description="\u8ddd\u79bb\u8ba1\u7b97\u65b9\u6cd5", defaultValue="\u6b27\u5f0f\u8ddd\u79bb", items="\u6b27\u5f0f\u8ddd\u79bb;\u5e73\u65b9\u8ddd\u79bb;\u4f59\u5f26\u8ddd\u79bb;\u53cd\u4f59\u5f26\u8ddd\u79bb") String type) {
        if (data == null) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u6570\u636e\u96c6\u4e3a\u7a7a");
        }
        if (distance <= 0.0) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--distance\u503c\u5c0f\u4e8e\u7b49\u4e8e0");
        }
        if (proportion <= 0.0 || proportion >= 1.0) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--proportion\u503c\u4e3a\u7a7a");
        }
        if (columns == null || columns.length() == 0) {
            throw new a("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u62e9\u7684\u5217\u540d\u4e3a\u7a7a");
        }
        String[] cols = columns.split(";");
        String[] result = j.a(data.schema(), cols);
        if (result.length == 0) {
            logger.info("densities\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u4e2d\u7684\u5217\u4e2d\u6ca1\u6709\u53ef\u7528\u4e8e\u79bb\u7fa4\u70b9\u8ba1\u7b97, \u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if (data.count() >= 10000L) {
            logger.info("\u6570\u636e\u96c6\u5927\u4e8e10000\u884c\uff0c\u6682\u65f6\u4e0d\u652f\u6301\u4e8e\u79bb\u7fa4\u70b9\u68c0\u6d4b\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        return j.a((Dataset<Row>)data.toDF(), result, distance, proportion, type);
    }

    @InsightComponent(name="\u79bb\u7fa4\u70b9\u68c0\u6d4b(LOF)", type="com.datastax.insight.dataprprocess.detectOutlier.lof", description="\u901a\u8fc7LOF\u5224\u65ad\u79bb\u7fa4\u70b9")
    public static <T> Dataset<T> LOFOutlier(@InsightComponentArg(externalInput=true, name="\u6570\u636e\u96c6", description="\u6570\u636e\u96c6") Dataset<T> data, @InsightComponentArg(name="\u5217\u540d", description="\u9009\u62e9\u79bb\u7fa4\u70b9\u7684\u5217\u540d\uff0c\u7528\u5206\u53f7\u9694\u5f00") String columns, @InsightComponentArg(name="\u4e0b\u9650", description="\u7b2ck\u90bb\u8fd1\u70b9\u7684k\u7684\u4e0b\u9650") int lower, @InsightComponentArg(name="\u4e0a\u9650", description="\u7b2ck\u90bb\u8fd1\u70b9\u7684k\u7684\u4e0a\u9650") int upper, @InsightComponentArg(name="\u8ddd\u79bb\u65b9\u6cd5", description="\u8ddd\u79bb\u8ba1\u7b97\u65b9\u6cd5", defaultValue="\u6b27\u5f0f\u8ddd\u79bb", items="\u6b27\u5f0f\u8ddd\u79bb;\u5e73\u65b9\u8ddd\u79bb;\u4f59\u5f26\u8ddd\u79bb;\u53cd\u4f59\u5f26\u8ddd\u79bb") String type) {
        long count;
        if (data == null) {
            throw new a("LOF\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u6570\u636e\u96c6\u4e3a\u7a7a");
        }
        if (lower <= 0 || upper <= 0) {
            throw new a("LOF\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u4e0a\u9650\u6216\u8005\u4e0b\u9650\u5c0f\u4e8e\u6216\u7b49\u4e8e0");
        }
        if (columns == null || columns.length() == 0) {
            throw new a("LOF\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u62e9\u7684\u5217\u540d\u4e3a\u7a7a");
        }
        String[] cols = columns.split(";");
        String[] result = j.a(data.schema(), cols);
        if (result.length == 0) {
            logger.info("LOF\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u9009\u4e2d\u7684\u5217\u4e2d\u6ca1\u6709\u53ef\u7528\u4e8e\u79bb\u7fa4\u70b9\u8ba1\u7b97, \u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        if (upper < lower) {
            int temp = upper;
            upper = lower;
            lower = temp;
        }
        if ((count = data.count()) < (long)lower) {
            throw new a("LOF\u79bb\u7fa4\u70b9\u68c0\u6d4b--\u4e0b\u9650\u5927\u4e8e\u6570\u636e\u96c6\u7684\u884c\u6570");
        }
        if (count > (long)upper) {
            upper = (int)count;
        }
        if (data.count() >= 10000L) {
            logger.info("\u6570\u636e\u96c6\u5927\u4e8e10000\u884c\uff0c\u6682\u65f6\u4e0d\u652f\u6301\u4e8e\u79bb\u7fa4\u70b9\u68c0\u6d4b\uff0c\u8fd4\u56de\u539f\u6570\u636e\u96c6");
            return data;
        }
        return j.a((Dataset<Row>)data.toDF(), result, lower, upper, type);
    }

    protected static <T> Dataset<T> a(Dataset<T> data, String columns, int neighbors, int classOutliers) {
        return data;
    }
}

