package org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.gpu;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.shaded.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.shaded.com.google.common.collect.ImmutableSet;
import org.apache.hadoop.shaded.com.google.common.collect.Lists;
import org.apache.hadoop.shaded.com.google.common.collect.Sets;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.ResourcesExceptionUtil;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformation;
import org.apache.hadoop.yarn.server.nodemanager.webapp.dao.gpu.GpuDeviceInformationParser;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.placement.converter.LegacyMappingRuleToJson;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@InterfaceAudience.Private
@InterfaceStability.Unstable
/* loaded from: input_file:org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/gpu/GpuDiscoverer.class */
public class GpuDiscoverer extends Configured {

    @VisibleForTesting
    static final String DEFAULT_BINARY_NAME = "nvidia-smi";
    private static final int MAX_REPEATED_ERROR_ALLOWED = 10;
    private NvidiaBinaryHelper nvidiaBinaryHelper;
    private String pathOfGpuBinary = null;
    private Map<String, String> environment = new HashMap();
    private GpuDeviceInformationParser parser = new GpuDeviceInformationParser();
    private int numOfErrorExecutionSinceLastSucceed = 0;
    private GpuDeviceInformation lastDiscoveredGpuInformation = null;
    private List<GpuDevice> gpuDevicesFromUser;
    public static final Logger LOG = LoggerFactory.getLogger(GpuDiscoverer.class);
    private static final Set<String> DEFAULT_BINARY_SEARCH_DIRS = ImmutableSet.of("/usr/bin", "/bin", "/usr/local/nvidia/bin");

    private void validateConfOrThrowException() throws YarnException {
        if (getConf() == null) {
            throw new YarnException("Please initialize (call initialize) before use " + GpuDiscoverer.class.getSimpleName());
        }
    }

    private String getErrorMessageOfScriptExecution(String str) {
        return getFailedToExecuteScriptMessage() + "! Exception message: " + str;
    }

    private String getErrorMessageOfScriptExecutionThresholdReached() {
        return getFailedToExecuteScriptMessage() + " for 10 times, skipping following executions!";
    }

    private String getFailedToExecuteScriptMessage() {
        return "Failed to execute GPU device detection script (" + this.pathOfGpuBinary + ")";
    }

    private String getFailedToParseErrorMessage(String str) {
        return "Failed to parse XML output of GPU device detection script( " + this.pathOfGpuBinary + ")" + str;
    }

    public synchronized GpuDeviceInformation getGpuDeviceInformation() throws YarnException {
        if (this.numOfErrorExecutionSinceLastSucceed == 10) {
            String errorMessageOfScriptExecutionThresholdReached = getErrorMessageOfScriptExecutionThresholdReached();
            LOG.error(errorMessageOfScriptExecutionThresholdReached);
            throw new YarnException(errorMessageOfScriptExecutionThresholdReached);
        }
        try {
            this.lastDiscoveredGpuInformation = this.nvidiaBinaryHelper.getGpuDeviceInformation(this.pathOfGpuBinary);
            return this.lastDiscoveredGpuInformation;
        } catch (IOException e) {
            this.numOfErrorExecutionSinceLastSucceed++;
            String errorMessageOfScriptExecution = getErrorMessageOfScriptExecution(e.getMessage());
            if (LOG.isDebugEnabled()) {
                LOG.debug(errorMessageOfScriptExecution);
            }
            throw new YarnException(errorMessageOfScriptExecution, e);
        } catch (YarnException e2) {
            this.numOfErrorExecutionSinceLastSucceed++;
            LOG.debug(getFailedToParseErrorMessage(e2.getMessage()), e2);
            throw e2;
        }
    }

    boolean isAutoDiscoveryEnabled() {
        return getConf().get("yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices", "auto").equals("auto");
    }

    public synchronized List<GpuDevice> getGpusUsableByYarn() throws YarnException {
        validateConfOrThrowException();
        if (isAutoDiscoveryEnabled()) {
            return parseGpuDevicesFromAutoDiscoveredGpuInfo();
        }
        if (this.gpuDevicesFromUser == null) {
            this.gpuDevicesFromUser = parseGpuDevicesFromUserDefinedValues();
        }
        return this.gpuDevicesFromUser;
    }

    private List<GpuDevice> parseGpuDevicesFromAutoDiscoveredGpuInfo() throws YarnException {
        if (this.lastDiscoveredGpuInformation == null) {
            LOG.error("yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices is set to auto, however automatically discovering GPU information failed, please check NodeManager log for more details, as an alternative, admin can specify yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices manually to enable GPU isolation.");
            throw new YarnException("yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices is set to auto, however automatically discovering GPU information failed, please check NodeManager log for more details, as an alternative, admin can specify yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices manually to enable GPU isolation.");
        }
        ArrayList arrayList = new ArrayList();
        if (this.lastDiscoveredGpuInformation.getGpus() != null) {
            int size = this.lastDiscoveredGpuInformation.getGpus().size();
            LOG.debug("Found {} GPU devices", Integer.valueOf(size));
            for (int i = 0; i < size; i++) {
                arrayList.add(new GpuDevice(i, this.lastDiscoveredGpuInformation.getGpus().get(i).getMinorNumber()));
            }
        }
        return arrayList;
    }

    private List<GpuDevice> parseGpuDevicesFromUserDefinedValues() throws YarnException {
        String str = getConf().get("yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices", "auto");
        if (str.trim().isEmpty()) {
            throw GpuDeviceSpecificationException.createWithEmptyValueSpecified();
        }
        ArrayList newArrayList = Lists.newArrayList();
        for (String str2 : str.split(",")) {
            if (str2.trim().length() > 0) {
                String[] split = str2.trim().split(LegacyMappingRuleToJson.RULE_PART_DELIMITER);
                if (split.length != 2) {
                    ResourcesExceptionUtil.throwIfNecessary(GpuDeviceSpecificationException.createWithWrongValueSpecified(str2, str), getConf());
                    LOG.warn("Wrong GPU specification string {}, ignored", str2);
                }
                try {
                    GpuDevice parseGpuDevice = parseGpuDevice(split);
                    if (newArrayList.contains(parseGpuDevice)) {
                        ResourcesExceptionUtil.throwIfNecessary(GpuDeviceSpecificationException.createWithDuplicateValueSpecified(str2, str), getConf());
                        LOG.warn("CPU device is duplicated: {}", str2);
                    } else {
                        newArrayList.add(parseGpuDevice);
                    }
                } catch (NumberFormatException e) {
                    ResourcesExceptionUtil.throwIfNecessary(GpuDeviceSpecificationException.createWithWrongValueSpecified(str2, str, e), getConf());
                    LOG.warn("Cannot parse GPU device numbers: {}", str2);
                }
            }
        }
        LOG.info("Allowed GPU devices:" + newArrayList);
        return newArrayList;
    }

    private GpuDevice parseGpuDevice(String[] strArr) {
        return new GpuDevice(Integer.parseInt(strArr[0]), Integer.parseInt(strArr[1]));
    }

    public synchronized void initialize(Configuration configuration, NvidiaBinaryHelper nvidiaBinaryHelper) throws YarnException {
        setConf(configuration);
        this.nvidiaBinaryHelper = nvidiaBinaryHelper;
        if (isAutoDiscoveryEnabled()) {
            this.numOfErrorExecutionSinceLastSucceed = 0;
            lookUpAutoDiscoveryBinary(configuration);
            try {
                LOG.info("Trying to discover GPU information ...");
                LOG.info("Discovered GPU information: " + getGpuDeviceInformation().toString());
            } catch (YarnException e) {
                LOG.warn("Failed to discover GPU information from system, exception message:" + e.getMessage() + " continue...");
            }
        }
    }

    private void lookUpAutoDiscoveryBinary(Configuration configuration) throws YarnException {
        File file;
        String str = configuration.get("yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables", DEFAULT_BINARY_NAME);
        if (str.isEmpty()) {
            str = DEFAULT_BINARY_NAME;
        }
        File file2 = new File(str);
        if (!file2.exists()) {
            file = lookupBinaryInDefaultDirs();
        } else if (file2.isDirectory()) {
            file = handleConfiguredBinaryPathIsDirectory(file2);
        } else {
            file = file2;
            String name = file.getName();
            if (!DEFAULT_BINARY_NAME.equals(name)) {
                String format = String.format("Please check the configuration value of %s. It should point to an %s binary, which is now %s", "yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables", DEFAULT_BINARY_NAME, name);
                ResourcesExceptionUtil.throwIfNecessary(new YarnException(format), configuration);
                LOG.warn(format);
            }
        }
        this.pathOfGpuBinary = file.getAbsolutePath();
    }

    private File handleConfiguredBinaryPathIsDirectory(File file) throws YarnException {
        File file2 = new File(file, DEFAULT_BINARY_NAME);
        if (!file2.exists()) {
            throw new YarnException("Failed to find GPU discovery executable, please double check yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables setting. The setting points to a directory but no file found in the directory with name:nvidia-smi");
        }
        LOG.warn("Specified path is a directory, use nvidia-smi under the directory, updated path-to-executable:" + file2.getAbsolutePath());
        return file2;
    }

    private File lookupBinaryInDefaultDirs() throws YarnException {
        File lookupBinaryInDefaultDirsInternal = lookupBinaryInDefaultDirsInternal();
        if (lookupBinaryInDefaultDirsInternal == null) {
            throw new YarnException("Failed to find GPU discovery executable, please double check yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables setting. Also tried to find the executable in the default directories: " + DEFAULT_BINARY_SEARCH_DIRS);
        }
        return lookupBinaryInDefaultDirsInternal;
    }

    private File lookupBinaryInDefaultDirsInternal() {
        HashSet newHashSet = Sets.newHashSet();
        Iterator<String> it = DEFAULT_BINARY_SEARCH_DIRS.iterator();
        while (it.hasNext()) {
            File file = new File(it.next(), DEFAULT_BINARY_NAME);
            if (file.exists()) {
                return file;
            }
            newHashSet.add(file.getAbsolutePath());
        }
        LOG.warn("Failed to locate GPU device discovery binary, tried paths: " + newHashSet + "! Please double check the value of config yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables. Using default binary: " + DEFAULT_BINARY_NAME);
        return null;
    }

    @VisibleForTesting
    Map<String, String> getEnvironmentToRunCommand() {
        return this.environment;
    }

    @VisibleForTesting
    String getPathOfGpuBinary() {
        return this.pathOfGpuBinary;
    }
}
