package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;

import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.privileged.PrivilegedOperationExecutor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.CGroupsHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourceHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceAllocator;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.gpu.GpuResourceHandlerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.DockerCommandPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.NodeResourceUpdaterPlugin;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.NMResourceInfo;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.NMGpuResourceInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuResourcePlugin.class */
public class GpuResourcePlugin implements ResourcePlugin {
    private static final Logger LOG = LoggerFactory.getLogger(GpuResourcePlugin.class);
    private final GpuNodeResourceUpdateHandler resourceDiscoverHandler;
    private final GpuDiscoverer gpuDiscoverer;
    public static final int MAX_REPEATED_ERROR_ALLOWED = 10;
    private int numOfErrorExecutionSinceLastSucceed = 0;
    private GpuResourceHandlerImpl gpuResourceHandler = null;
    private DockerCommandPlugin dockerCommandPlugin = null;

    public GpuResourcePlugin(GpuNodeResourceUpdateHandler gpuNodeResourceUpdateHandler, GpuDiscoverer gpuDiscoverer) {
        this.resourceDiscoverHandler = gpuNodeResourceUpdateHandler;
        this.gpuDiscoverer = gpuDiscoverer;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public void initialize(Context context) throws YarnException {
        this.gpuDiscoverer.initialize(context.getConf(), new NvidiaBinaryHelper());
        this.dockerCommandPlugin = GpuDockerCommandPluginFactory.createGpuDockerCommandPlugin(context.getConf());
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public ResourceHandler createResourceHandler(Context context, CGroupsHandler cGroupsHandler, PrivilegedOperationExecutor privilegedOperationExecutor) {
        if (this.gpuResourceHandler == null) {
            this.gpuResourceHandler = new GpuResourceHandlerImpl(context, cGroupsHandler, privilegedOperationExecutor, this.gpuDiscoverer);
        }
        return this.gpuResourceHandler;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public NodeResourceUpdaterPlugin getNodeResourceHandlerInstance() {
        return this.resourceDiscoverHandler;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public void cleanup() throws YarnException {
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public DockerCommandPlugin getDockerCommandPluginInstance() {
        return this.dockerCommandPlugin;
    }

    @Override // org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.ResourcePlugin
    public synchronized NMResourceInfo getNMResourceInfo() throws YarnException {
        checkGpuResourceHandler();
        checkErrorCount();
        try {
            GpuDeviceInformation gpuDeviceInformation = this.gpuDiscoverer.getGpuDeviceInformation();
            this.numOfErrorExecutionSinceLastSucceed = 0;
            GpuResourceAllocator gpuAllocator = this.gpuResourceHandler.getGpuAllocator();
            return new NMGpuResourceInfo(gpuDeviceInformation, gpuAllocator.getAllowedGpus(), gpuAllocator.getAssignedGpus());
        } catch (YarnException e) {
            LOG.error(e.getMessage(), e);
            this.numOfErrorExecutionSinceLastSucceed++;
            throw e;
        }
    }

    private void checkGpuResourceHandler() throws YarnException {
        if (this.gpuResourceHandler == null) {
            LOG.warn("Linux Container Executor is not configured for the NodeManager. To fully enable GPU feature on the node also set yarn.nodemanager.container-executor.class properly.");
            throw new YarnException("Linux Container Executor is not configured for the NodeManager. To fully enable GPU feature on the node also set yarn.nodemanager.container-executor.class properly.");
        }
    }

    private void checkErrorCount() throws YarnException {
        if (this.numOfErrorExecutionSinceLastSucceed == 10) {
            LOG.error("Failed to execute GPU device information detection script for 10 times, skip following executions.");
            throw new YarnException("Failed to execute GPU device information detection script for 10 times, skip following executions.");
        }
    }

    public String toString() {
        return GpuResourcePlugin.class.getName();
    }
}
