package org.datacleaner.spark;

import java.io.InputStream;
import org.apache.metamodel.csv.CsvDataContext;
import org.apache.metamodel.util.Func;
import org.apache.metamodel.util.HdfsResource;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.JaxbConfigurationReader;
import org.datacleaner.connection.Datastore;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.JaxbJobReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/datacleaner/spark/SparkJobLauncher.class */
public class SparkJobLauncher {
    private static final Logger logger = LoggerFactory.getLogger(SparkJobLauncher.class);
    private final DataCleanerConfiguration _dataCleanerConfiguration;

    public SparkJobLauncher(DataCleanerConfiguration dataCleanerConfiguration) {
        this._dataCleanerConfiguration = dataCleanerConfiguration;
    }

    public SparkJobLauncher(String str) {
        this._dataCleanerConfiguration = readDataCleanerConfiguration(new HdfsResource(str));
    }

    public void launchJob(AnalysisJob analysisJob) {
        String name = analysisJob.getDatastore().getName();
        Datastore datastore = this._dataCleanerConfiguration.getDatastoreCatalog().getDatastore(name);
        if (datastore == null) {
            throw new IllegalStateException("Datastore referred by the job (" + name + ") has not been found in the specified DataCleanerConfiguration");
        }
        JavaSparkContext javaSparkContext = null;
        try {
            CsvDataContext dataContext = datastore.openConnection().getDataContext();
            if (dataContext instanceof CsvDataContext) {
                String qualifiedPath = dataContext.getResource().getQualifiedPath();
                javaSparkContext = new JavaSparkContext(new SparkConf().setAppName("DataCleaner-spark"));
                logger.info("Line count: " + javaSparkContext.textFile(qualifiedPath).count());
            }
        } finally {
            if (javaSparkContext != null) {
                javaSparkContext.stop();
            }
        }
    }

    public void launchJob(String str) {
        launchJob(readAnalysisJob(new HdfsResource(str)));
    }

    private static DataCleanerConfiguration readDataCleanerConfiguration(HdfsResource hdfsResource) {
        return new JaxbConfigurationReader().create(hdfsResource.read());
    }

    private AnalysisJob readAnalysisJob(HdfsResource hdfsResource) {
        return (AnalysisJob) hdfsResource.read(new Func<InputStream, AnalysisJob>() { // from class: org.datacleaner.spark.SparkJobLauncher.1
            public AnalysisJob eval(InputStream inputStream) {
                return new JaxbJobReader(SparkJobLauncher.this._dataCleanerConfiguration).read(inputStream);
            }
        });
    }

    public DataCleanerConfiguration getDataCleanerConfiguration() {
        return this._dataCleanerConfiguration;
    }

    public static void main(String[] strArr) {
        if (strArr.length < 2) {
            throw new IllegalArgumentException("The number of arguments is incorrect. Usage:\n <path_to_conf_xml_file_in_hdfs> <path_to_analysis_job_xml_in_hdfs>");
        }
        String str = strArr[0];
        new SparkJobLauncher(str).launchJob(strArr[1]);
    }
}
