解决多张gpu卡情况下测试失败问题
This commit is contained in:
parent
7b80b7d64f
commit
b8c3356c93
36
oec-hardware-1.1.5-6-fix-bug-gpu.patch
Normal file
36
oec-hardware-1.1.5-6-fix-bug-gpu.patch
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
diff -Naur rpm/tests/compatible/gpu/nvidia_gpu.py oech/tests/compatible/gpu/nvidia_gpu.py
|
||||||
|
--- rpm/tests/compatible/gpu/nvidia_gpu.py 2024-08-29 19:48:19.472522063 +0800
|
||||||
|
+++ oech/tests/compatible/gpu/nvidia_gpu.py 2024-08-30 16:21:51.821261220 +0800
|
||||||
|
@@ -73,6 +73,11 @@
|
||||||
|
|
||||||
|
self.logger.info("Set default test gpu as %s." % id_num)
|
||||||
|
|
||||||
|
+ def clean_default_gpu(self):
|
||||||
|
+ if 'CUDA_VISIBLE_DEVICES' in os.environ:
|
||||||
|
+ del os.environ['CUDA_VISIBLE_DEVICES']
|
||||||
|
+ self.logger.info("Clean default test gpu.")
|
||||||
|
+
|
||||||
|
def test_pressure(self):
|
||||||
|
"""
|
||||||
|
Set pressure for gpu to test
|
||||||
|
@@ -87,10 +92,7 @@
|
||||||
|
|
||||||
|
os.chdir("/opt/gpu-burn")
|
||||||
|
cmd = self.command.run_cmd(
|
||||||
|
- "nvidia-smi -q | grep -i -A1 '%s' | grep 'Product Name' | cut -d ':' -f 2" % pci_num)
|
||||||
|
- device_name = cmd[0].strip()
|
||||||
|
- cmd = self.command.run_cmd(
|
||||||
|
- "./gpu_burn -l | grep -i '%s' | cut -d ':' -f 1 | awk '{print $2}'" % device_name)
|
||||||
|
+ "nvidia-smi -q | grep -i -A20 '%s' | grep 'Minor Number' | cut -d ':' -f 2" % pci_num)
|
||||||
|
run_id = cmd[0].strip()
|
||||||
|
cmd = getstatusoutput(
|
||||||
|
'nohup ./gpu_burn -i%s 10 &> %s &' % (run_id, self.gpu_burn))
|
||||||
|
@@ -185,6 +187,8 @@
|
||||||
|
result = False
|
||||||
|
self.logger.error("Test Vulkan failed.")
|
||||||
|
|
||||||
|
+ self.clean_default_gpu()
|
||||||
|
+
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(
|
||||||
|
"Failed to run the script because compiling or setting variables: %s" % e)
|
||||||
@ -6,7 +6,7 @@
|
|||||||
Name: oec-hardware
|
Name: oec-hardware
|
||||||
Summary: openEuler Hardware Compatibility Test Suite
|
Summary: openEuler Hardware Compatibility Test Suite
|
||||||
Version: 1.1.5
|
Version: 1.1.5
|
||||||
Release: 5
|
Release: 6
|
||||||
Group: Development/Tools
|
Group: Development/Tools
|
||||||
License: Mulan PSL v2
|
License: Mulan PSL v2
|
||||||
URL: https://gitee.com/openeuler/oec-hardware
|
URL: https://gitee.com/openeuler/oec-hardware
|
||||||
@ -18,6 +18,7 @@ Patch0002: oec-hardware-1.1.5-2-fix-bug.patch
|
|||||||
Patch0003: oec-hardware-1.1.5-3-functional-optimization-fix-bug.patch
|
Patch0003: oec-hardware-1.1.5-3-functional-optimization-fix-bug.patch
|
||||||
Patch0004: oec-hardware-1.1.5-4-fix-gpu-testcase-bug.patch
|
Patch0004: oec-hardware-1.1.5-4-fix-gpu-testcase-bug.patch
|
||||||
Patch0005: oec-hardware-1.1.5-5-fix-cpufreq-userspace.patch
|
Patch0005: oec-hardware-1.1.5-5-fix-cpufreq-userspace.patch
|
||||||
|
Patch0006: oec-hardware-1.1.5-6-fix-bug-gpu.patch
|
||||||
|
|
||||||
Buildroot: %{_tmppath}/%{name}-%{version}-root
|
Buildroot: %{_tmppath}/%{name}-%{version}-root
|
||||||
BuildRequires: gcc
|
BuildRequires: gcc
|
||||||
@ -46,6 +47,7 @@ openEuler Hardware Compatibility Test Server
|
|||||||
%patch3 -p1
|
%patch3 -p1
|
||||||
%patch4 -p1
|
%patch4 -p1
|
||||||
%patch5 -p1
|
%patch5 -p1
|
||||||
|
%patch6 -p1
|
||||||
|
|
||||||
%build
|
%build
|
||||||
%ifarch x86_64 aarch64
|
%ifarch x86_64 aarch64
|
||||||
@ -93,6 +95,10 @@ sed -i 's#grep openeulerversion /etc/openEuler-latest#grep %{vendor_lowercase}ve
|
|||||||
rm -rf /var/lock/oech.lock
|
rm -rf /var/lock/oech.lock
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Aug 30 2024 gaochuanji <gaochuanji@inspur.com> - 1.1.5-6
|
||||||
|
- Resolve the issue of the gpu_burn test failed when multiple gpus
|
||||||
|
- Resolve the issue of first gpu test success and others failed when multiple gpus
|
||||||
|
|
||||||
* Tue Aug 13 2024 gaochuanji <gaochuanji@inspur.com> - 1.1.5-5
|
* Tue Aug 13 2024 gaochuanji <gaochuanji@inspur.com> - 1.1.5-5
|
||||||
- Resolve the issue of failed userspace testing for cpufreq on certain models
|
- Resolve the issue of failed userspace testing for cpufreq on certain models
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user