解决多张gpu卡情况下测试失败问题
This commit is contained in:
parent
7b80b7d64f
commit
b8c3356c93
36
oec-hardware-1.1.5-6-fix-bug-gpu.patch
Normal file
36
oec-hardware-1.1.5-6-fix-bug-gpu.patch
Normal file
@ -0,0 +1,36 @@
|
||||
diff -Naur rpm/tests/compatible/gpu/nvidia_gpu.py oech/tests/compatible/gpu/nvidia_gpu.py
|
||||
--- rpm/tests/compatible/gpu/nvidia_gpu.py 2024-08-29 19:48:19.472522063 +0800
|
||||
+++ oech/tests/compatible/gpu/nvidia_gpu.py 2024-08-30 16:21:51.821261220 +0800
|
||||
@@ -73,6 +73,11 @@
|
||||
|
||||
self.logger.info("Set default test gpu as %s." % id_num)
|
||||
|
||||
+ def clean_default_gpu(self):
|
||||
+ if 'CUDA_VISIBLE_DEVICES' in os.environ:
|
||||
+ del os.environ['CUDA_VISIBLE_DEVICES']
|
||||
+ self.logger.info("Clean default test gpu.")
|
||||
+
|
||||
def test_pressure(self):
|
||||
"""
|
||||
Set pressure for gpu to test
|
||||
@@ -87,10 +92,7 @@
|
||||
|
||||
os.chdir("/opt/gpu-burn")
|
||||
cmd = self.command.run_cmd(
|
||||
- "nvidia-smi -q | grep -i -A1 '%s' | grep 'Product Name' | cut -d ':' -f 2" % pci_num)
|
||||
- device_name = cmd[0].strip()
|
||||
- cmd = self.command.run_cmd(
|
||||
- "./gpu_burn -l | grep -i '%s' | cut -d ':' -f 1 | awk '{print $2}'" % device_name)
|
||||
+ "nvidia-smi -q | grep -i -A20 '%s' | grep 'Minor Number' | cut -d ':' -f 2" % pci_num)
|
||||
run_id = cmd[0].strip()
|
||||
cmd = getstatusoutput(
|
||||
'nohup ./gpu_burn -i%s 10 &> %s &' % (run_id, self.gpu_burn))
|
||||
@@ -185,6 +187,8 @@
|
||||
result = False
|
||||
self.logger.error("Test Vulkan failed.")
|
||||
|
||||
+ self.clean_default_gpu()
|
||||
+
|
||||
except Exception as e:
|
||||
self.logger.error(
|
||||
"Failed to run the script because compiling or setting variables: %s" % e)
|
||||
@ -6,7 +6,7 @@
|
||||
Name: oec-hardware
|
||||
Summary: openEuler Hardware Compatibility Test Suite
|
||||
Version: 1.1.5
|
||||
Release: 5
|
||||
Release: 6
|
||||
Group: Development/Tools
|
||||
License: Mulan PSL v2
|
||||
URL: https://gitee.com/openeuler/oec-hardware
|
||||
@ -18,6 +18,7 @@ Patch0002: oec-hardware-1.1.5-2-fix-bug.patch
|
||||
Patch0003: oec-hardware-1.1.5-3-functional-optimization-fix-bug.patch
|
||||
Patch0004: oec-hardware-1.1.5-4-fix-gpu-testcase-bug.patch
|
||||
Patch0005: oec-hardware-1.1.5-5-fix-cpufreq-userspace.patch
|
||||
Patch0006: oec-hardware-1.1.5-6-fix-bug-gpu.patch
|
||||
|
||||
Buildroot: %{_tmppath}/%{name}-%{version}-root
|
||||
BuildRequires: gcc
|
||||
@ -46,6 +47,7 @@ openEuler Hardware Compatibility Test Server
|
||||
%patch3 -p1
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
|
||||
%build
|
||||
%ifarch x86_64 aarch64
|
||||
@ -93,6 +95,10 @@ sed -i 's#grep openeulerversion /etc/openEuler-latest#grep %{vendor_lowercase}ve
|
||||
rm -rf /var/lock/oech.lock
|
||||
|
||||
%changelog
|
||||
* Fri Aug 30 2024 gaochuanji <gaochuanji@inspur.com> - 1.1.5-6
|
||||
- Resolve the issue of the gpu_burn test failed when multiple gpus
|
||||
- Resolve the issue of first gpu test success and others failed when multiple gpus
|
||||
|
||||
* Tue Aug 13 2024 gaochuanji <gaochuanji@inspur.com> - 1.1.5-5
|
||||
- Resolve the issue of failed userspace testing for cpufreq on certain models
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user