package org.datacleaner.cluster;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.SharedExecutorService;
import org.datacleaner.api.InputColumn;
import org.datacleaner.cluster.virtual.VirtualClusterManager;
import org.datacleaner.components.maxrows.MaxRowsFilter;
import org.datacleaner.configuration.AnalyzerBeansConfiguration;
import org.datacleaner.data.MetaModelInputColumn;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.ComponentJob;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.datacleaner.job.builder.FilterComponentBuilder;
import org.datacleaner.job.concurrent.SingleThreadedTaskRunner;
import org.datacleaner.job.concurrent.TaskListener;
import org.datacleaner.job.runner.AnalysisJobMetrics;
import org.datacleaner.job.runner.AnalysisListener;
import org.datacleaner.job.runner.AnalysisResultFuture;
import org.datacleaner.job.runner.AnalysisRunner;
import org.datacleaner.job.runner.CompositeAnalysisListener;
import org.datacleaner.job.runner.RowProcessingMetrics;
import org.datacleaner.job.runner.RowProcessingPublisher;
import org.datacleaner.job.runner.RowProcessingPublishers;
import org.datacleaner.job.tasks.Task;
import org.datacleaner.lifecycle.LifeCycleHelper;
import org.datacleaner.util.SourceColumnFinder;
import org.datacleaner.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/datacleaner/cluster/DistributedAnalysisRunner.class */
public final class DistributedAnalysisRunner implements AnalysisRunner {
    private static final Logger logger;
    private final ClusterManager _clusterManager;
    private final AnalyzerBeansConfiguration _configuration;
    private final CompositeAnalysisListener _analysisListener;
    static final /* synthetic */ boolean $assertionsDisabled;

    public DistributedAnalysisRunner(AnalyzerBeansConfiguration analyzerBeansConfiguration, ClusterManager clusterManager) {
        this(analyzerBeansConfiguration, clusterManager, new AnalysisListener[0]);
    }

    public DistributedAnalysisRunner(AnalyzerBeansConfiguration analyzerBeansConfiguration, ClusterManager clusterManager, AnalysisListener... analysisListenerArr) {
        this._configuration = analyzerBeansConfiguration;
        this._clusterManager = clusterManager;
        this._analysisListener = new CompositeAnalysisListener(analysisListenerArr);
    }

    public boolean isDistributable(AnalysisJob analysisJob) {
        try {
            failIfJobIsUnsupported(analysisJob);
            return true;
        } catch (Throwable th) {
            return false;
        }
    }

    public AnalysisResultFuture run(AnalysisJob analysisJob) throws UnsupportedOperationException {
        AnalysisResultFuture distributedAnalysisResultFuture;
        logger.info("Validating distributed job: {}", analysisJob);
        failIfJobIsUnsupported(analysisJob);
        LifeCycleHelper lifeCycleHelper = new LifeCycleHelper(this._configuration.getInjectionManager(analysisJob), true);
        RowProcessingPublishers rowProcessingPublishers = getRowProcessingPublishers(analysisJob, lifeCycleHelper);
        RowProcessingPublisher rowProcessingPublisher = getRowProcessingPublisher(rowProcessingPublishers);
        rowProcessingPublisher.initializeConsumers(new TaskListener() { // from class: org.datacleaner.cluster.DistributedAnalysisRunner.1
            public void onError(Task task, Throwable th) {
                DistributedAnalysisRunner.logger.error("Failed to initialize consumers at master node!", th);
            }

            public void onComplete(Task task) {
            }

            public void onBegin(Task task) {
            }
        });
        logger.info("Validation passed! Chunking job for distribution amongst slaves: {}", analysisJob);
        AnalysisJobMetrics analysisJobMetrics = rowProcessingPublishers.getAnalysisJobMetrics();
        this._analysisListener.jobBegin(analysisJob, analysisJobMetrics);
        RowProcessingMetrics rowProcessingMetrics = rowProcessingPublisher.getRowProcessingMetrics();
        this._analysisListener.rowProcessingBegin(analysisJob, rowProcessingMetrics);
        try {
            int expectedRows = rowProcessingMetrics.getExpectedRows();
            if (expectedRows == 0) {
                logger.info("Expected rows of the job was zero. Job will run on a local virtual slave.");
                distributedAnalysisResultFuture = new VirtualClusterManager(this._configuration, 1).dispatchJob(analysisJob, new DistributedJobContextImpl(this._configuration, analysisJob, 0, 1));
            } else {
                int calculateDivisionCount = this._clusterManager.getJobDivisionManager().calculateDivisionCount(analysisJob, expectedRows);
                int i = (expectedRows + 1) / calculateDivisionCount;
                logger.info("Expected rows was {}. A total number of {} slave jobs will be built, each of approx. {} rows.", new Object[]{Integer.valueOf(expectedRows), Integer.valueOf(calculateDivisionCount), Integer.valueOf(i)});
                distributedAnalysisResultFuture = new DistributedAnalysisResultFuture(dispatchJobs(analysisJob, calculateDivisionCount, i, rowProcessingPublisher), new DistributedAnalysisResultReducer(analysisJob, lifeCycleHelper, rowProcessingPublisher, this._analysisListener));
            }
            if (!this._analysisListener.isEmpty()) {
                awaitAndInformListener(analysisJob, analysisJobMetrics, rowProcessingMetrics, distributedAnalysisResultFuture);
            }
            return distributedAnalysisResultFuture;
        } catch (RuntimeException e) {
            this._analysisListener.errorUknown(analysisJob, e);
            throw e;
        }
    }

    private void awaitAndInformListener(final AnalysisJob analysisJob, final AnalysisJobMetrics analysisJobMetrics, RowProcessingMetrics rowProcessingMetrics, final AnalysisResultFuture analysisResultFuture) {
        SharedExecutorService.get().execute(new Runnable() { // from class: org.datacleaner.cluster.DistributedAnalysisRunner.2
            @Override // java.lang.Runnable
            public void run() {
                analysisResultFuture.await();
                if (analysisResultFuture.isSuccessful()) {
                    DistributedAnalysisRunner.this._analysisListener.jobSuccess(analysisJob, analysisJobMetrics);
                }
            }
        });
    }

    public List<AnalysisResultFuture> dispatchJobs(AnalysisJob analysisJob, int i, int i2, RowProcessingPublisher rowProcessingPublisher) {
        ArrayList arrayList = new ArrayList();
        int i3 = 0;
        while (i3 < i) {
            int i4 = (i3 * i2) + 1;
            AnalysisJob buildSlaveJob = buildSlaveJob(analysisJob, i3, i4, i3 == i - 1 ? (Integer.MAX_VALUE - i4) - 1 : i2);
            DistributedJobContextImpl distributedJobContextImpl = new DistributedJobContextImpl(this._configuration, analysisJob, i3, i);
            try {
                logger.info("Dispatching slave job {} of {}", Integer.valueOf(i3 + 1), Integer.valueOf(i));
                arrayList.add(this._clusterManager.dispatchJob(buildSlaveJob, distributedJobContextImpl));
                i3++;
            } catch (Exception e) {
                this._analysisListener.errorUknown(analysisJob, e);
                arrayList.add(0, new FailedAnalysisResultFuture(e));
            }
        }
        return arrayList;
    }

    private AnalysisJob buildSlaveJob(AnalysisJob analysisJob, int i, int i2, int i3) {
        logger.info("Building slave job {} with firstRow={} and maxRow={}", new Object[]{Integer.valueOf(i + 1), Integer.valueOf(i2), Integer.valueOf(i3)});
        AnalysisJobBuilder analysisJobBuilder = new AnalysisJobBuilder(this._configuration, analysisJob);
        Throwable th = null;
        try {
            FilterComponentBuilder addFilter = analysisJobBuilder.addFilter(MaxRowsFilter.class);
            addFilter.getComponentInstance().setFirstRow(i2);
            addFilter.getComponentInstance().setMaxRows(i3);
            if (!analysisJobBuilder.getDatastore().getPerformanceCharacteristics().isNaturalRecordOrderConsistent()) {
                addFilter.getComponentInstance().setOrderColumn(findOrderByColumn(analysisJobBuilder));
            }
            analysisJobBuilder.setDefaultRequirement(addFilter, MaxRowsFilter.Category.VALID);
            if (!$assertionsDisabled && !analysisJobBuilder.isConfigured(true)) {
                throw new AssertionError();
            }
            AnalysisJob analysisJob2 = analysisJobBuilder.toAnalysisJob();
            if (analysisJobBuilder != null) {
                if (0 != 0) {
                    try {
                        analysisJobBuilder.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    analysisJobBuilder.close();
                }
            }
            return analysisJob2;
        } catch (Throwable th3) {
            if (analysisJobBuilder != null) {
                if (0 != 0) {
                    try {
                        analysisJobBuilder.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    analysisJobBuilder.close();
                }
            }
            throw th3;
        }
    }

    private InputColumn<?> findOrderByColumn(AnalysisJobBuilder analysisJobBuilder) {
        Table table = (Table) analysisJobBuilder.getSourceTables().get(0);
        Column[] primaryKeys = table.getPrimaryKeys();
        if (primaryKeys.length == 1) {
            Column column = primaryKeys[0];
            InputColumn<?> sourceColumnByName = analysisJobBuilder.getSourceColumnByName(column.getName());
            if (sourceColumnByName != null) {
                logger.info("Using existing PK source column for ORDER BY clause on slave jobs: {}", sourceColumnByName);
                return sourceColumnByName;
            }
            analysisJobBuilder.addSourceColumn(column);
            logger.info("Added PK source column for ORDER BY clause on slave jobs: {}", sourceColumnByName);
            return analysisJobBuilder.getSourceColumnByName(column.getName());
        }
        if (logger.isDebugEnabled()) {
            logger.debug("Found {} primary keys, cannot select a single for ORDER BY clause on slave jobs: {}", Integer.valueOf(primaryKeys.length), Arrays.toString(primaryKeys));
        }
        List<MetaModelInputColumn> sourceColumns = analysisJobBuilder.getSourceColumns();
        String lowerCase = table.getName().toLowerCase();
        for (MetaModelInputColumn metaModelInputColumn : sourceColumns) {
            String name = metaModelInputColumn.getName();
            if (name != null) {
                String lowerCase2 = StringUtils.replaceAll(StringUtils.replaceAll(StringUtils.replaceWhitespaces(name, ""), "_", ""), "-", "").toLowerCase();
                if ("id".equals(lowerCase2) || (lowerCase + "id").equals(lowerCase2) || (lowerCase + "number").equals(lowerCase2) || (lowerCase + "key").equals(lowerCase2)) {
                    logger.info("Using existing source column for ORDER BY clause on slave jobs: {}", metaModelInputColumn);
                    return metaModelInputColumn;
                }
            }
        }
        MetaModelInputColumn metaModelInputColumn2 = (MetaModelInputColumn) sourceColumns.get(0);
        logger.warn("Couldn't pick a good source column for ORDER BY clause on slave jobs. Picking the first column: {}", metaModelInputColumn2);
        return metaModelInputColumn2;
    }

    private RowProcessingPublishers getRowProcessingPublishers(AnalysisJob analysisJob, LifeCycleHelper lifeCycleHelper) {
        SourceColumnFinder sourceColumnFinder = new SourceColumnFinder();
        sourceColumnFinder.addSources(analysisJob);
        return new RowProcessingPublishers(analysisJob, (AnalysisListener) null, new SingleThreadedTaskRunner(), lifeCycleHelper, sourceColumnFinder);
    }

    private RowProcessingPublisher getRowProcessingPublisher(RowProcessingPublishers rowProcessingPublishers) {
        Table[] tables = rowProcessingPublishers.getTables();
        if (tables.length != 1) {
            throw new UnsupportedOperationException("Jobs with multiple source tables are not distributable");
        }
        return rowProcessingPublishers.getRowProcessingPublisher(tables[0]);
    }

    private void failIfJobIsUnsupported(AnalysisJob analysisJob) throws UnsupportedOperationException {
        failIfComponentsAreUnsupported(analysisJob.getFilterJobs());
        failIfComponentsAreUnsupported(analysisJob.getTransformerJobs());
        failIfComponentsAreUnsupported(analysisJob.getAnalyzerJobs());
    }

    private void failIfComponentsAreUnsupported(Collection<? extends ComponentJob> collection) throws UnsupportedOperationException {
        for (ComponentJob componentJob : collection) {
            if (!componentJob.getDescriptor().isDistributable()) {
                throw new UnsupportedOperationException("Component is not distributable: " + componentJob);
            }
        }
    }

    static {
        $assertionsDisabled = !DistributedAnalysisRunner.class.desiredAssertionStatus();
        logger = LoggerFactory.getLogger(DistributedAnalysisRunner.class);
    }
}
