diff options
author | Yiyang Wu <xgreenlandforwyy@gmail.com> | 2022-08-23 18:26:01 +0800 |
---|---|---|
committer | Benda Xu <heroxbd@gentoo.org> | 2022-09-22 09:08:13 +0800 |
commit | 7fab2f99183ab9524f07182af5efecea9e817d8e (patch) | |
tree | 4f2a8b21603bb52183b4e10523f92cb1c48e6ef2 /sci-libs | |
parent | sci-libs/rocSOLVER: add 5.1.3, using rocm.eclass (diff) | |
download | gentoo-7fab2f99183ab9524f07182af5efecea9e817d8e.tar.gz gentoo-7fab2f99183ab9524f07182af5efecea9e817d8e.tar.bz2 gentoo-7fab2f99183ab9524f07182af5efecea9e817d8e.zip |
sci-libs/miopen: bump to 5.1.3, switch to vanilla clang-14
Comparing to 5.0.2, 5.1.3 also has:
1. corrected dependencies
2. compilation errors fixed for gcc-12 (although ROCm-5.1.3 on clang-14
is incompatible with gcc-12, the patch would be useful when using
clang-15)
3. deprecation of clang-ocl
4. cmake_src_prepare moved to the front, because src_prepare needs
BUILD_DIR
5. fix invalid metadata issue found by using vanilla clang
6. enable test on specific arch
Closes: https://github.com/gentoo/gentoo/pull/27219
Bugs: https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731
Signed-off-by: Yiyang Wu <xgreenlandforwyy@gmail.com>
Signed-off-by: Benda Xu <heroxbd@gentoo.org>
Diffstat (limited to 'sci-libs')
-rw-r--r-- | sci-libs/miopen/Manifest | 1 | ||||
-rw-r--r-- | sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch | 183 | ||||
-rw-r--r-- | sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch | 55 | ||||
-rw-r--r-- | sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch | 397 | ||||
-rw-r--r-- | sci-libs/miopen/files/miopen-5.1.3-include-array.patch | 12 | ||||
-rw-r--r-- | sci-libs/miopen/files/miopen-5.1.3-no-strip.patch | 17 | ||||
-rw-r--r-- | sci-libs/miopen/miopen-5.1.3.ebuild | 109 | ||||
-rw-r--r-- | sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild | 1 |
8 files changed, 774 insertions, 1 deletions
diff --git a/sci-libs/miopen/Manifest b/sci-libs/miopen/Manifest index 2a8dbb55fead..c987c1548233 100644 --- a/sci-libs/miopen/Manifest +++ b/sci-libs/miopen/Manifest @@ -1,2 +1,3 @@ DIST MIOpen-4.3.0.tar.gz 59405686 BLAKE2B fe91bd91a74023866883d6d0a2a8071a8fe40c4cff2fb4ef58fc6e343a05ac2a731f73e657f4d183ade4e5b7c1fbbe41f3f032918f6e50cb713073aee8d97dc5 SHA512 a8615b7738acfbc7f68d9417b0746c62630d2b48fb8485fafba4db65a4b277a8230f601d249d7e54f89ba25c14176429ca76ad8365a437b09d836b031b0c4fbb DIST MIOpen-5.0.2.tar.gz 76294827 BLAKE2B 7b2a1f0e675793aee4a0fa2a270caac8332cda36c8f04cee483cc2882ed987b6e676e9c24a1acf4976a16a10f922b1a6263470b419aa88a29cfcb6d6d4b4cc29 SHA512 a581b45220797904db3e4dd3840f2ef96085f00baf8187c5ab574325a66da4f599dee6496457bb1cc32825b57a13fb0ef35a2ef1bd2a5f449c7e7b9fa64b27d1 +DIST MIOpen-5.1.3.tar.gz 88118329 BLAKE2B d24722ffc5f5dab6d6a1de2ce34193ad2f25c9a2562e38c52e010a29870f01d9ea1c56970ba0601a088c8286e97958ee95d0da27fc8082126dd2ebe5ccb36b70 SHA512 a14e28cfcb12e5061e0e7b999ef3e67fa0a0e897e31bc50e7288b8a23eb1791312e33d3b697021c2b654ccc065ae1b046c1cfd77ba8e04b0f3e87e9cc0626dcd diff --git a/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch b/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch new file mode 100644 index 000000000000..3dca20f1fb12 --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.1.3-avoid-metadata-error-for-vanilla-clang.patch @@ -0,0 +1,183 @@ +Together with find-sed command in 5.1.3 ebuild, this fixes +https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731 +index 71b2cabf1..60e7fab6e 100644 +--- a/src/kernels/Conv_Winograd_v13_3_12_epilogue.inc ++++ b/src/kernels/Conv_Winograd_v13_3_12_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc b/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc +index 36d47c862..f8f677ec6 100644 +--- a/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc ++++ b/src/kernels/Conv_Winograd_v16_5_0_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/Conv_Winograd_v21_1_3_metadata.inc b/src/kernels/Conv_Winograd_v21_1_3_metadata.inc +index deff81e84..ed47abea7 100644 +--- a/src/kernels/Conv_Winograd_v21_1_3_metadata.inc ++++ b/src/kernels/Conv_Winograd_v21_1_3_metadata.inc +@@ -51,7 +51,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv1x1u.s b/src/kernels/conv1x1u.s +index 5dc213546..c890d45a4 100644 +--- a/src/kernels/conv1x1u.s ++++ b/src/kernels/conv1x1u.s +@@ -1076,7 +1076,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv1x1u_bias_activ.s b/src/kernels/conv1x1u_bias_activ.s +index 1675e819a..6bbdd9936 100644 +--- a/src/kernels/conv1x1u_bias_activ.s ++++ b/src/kernels/conv1x1u_bias_activ.s +@@ -1230,7 +1230,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv1x1u_stride2.s b/src/kernels/conv1x1u_stride2.s +index c5ea1e90c..6241edcf7 100644 +--- a/src/kernels/conv1x1u_stride2.s ++++ b/src/kernels/conv1x1u_stride2.s +@@ -1162,7 +1162,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv1x1wrw.s b/src/kernels/conv1x1wrw.s +index b13b6ffa4..eb63f17c6 100644 +--- a/src/kernels/conv1x1wrw.s ++++ b/src/kernels/conv1x1wrw.s +@@ -1243,7 +1243,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv3x3wrw.s b/src/kernels/conv3x3wrw.s +index a3f73aeae..b6fb1632c 100755 +--- a/src/kernels/conv3x3wrw.s ++++ b/src/kernels/conv3x3wrw.s +@@ -1033,7 +1033,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc +index 358772e63..b27ad5284 100644 +--- a/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc ++++ b/src/kernels/conv_3x3_wheel_alpha_v3_0b_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc +index d3296969a..f873ce153 100644 +--- a/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc ++++ b/src/kernels/conv_3x3_wheel_alpha_v7_0_3b_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc +index a253cc0f9..1582d002c 100644 +--- a/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc ++++ b/src/kernels/conv_3x3_wheel_alpha_v9_0_15_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc b/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc +index e40ac1f60..78495e024 100644 +--- a/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc ++++ b/src/kernels/conv_3x3_wheel_alpha_v9_2_7_epilogue.inc +@@ -76,7 +76,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/xform_bidirect_winograd_code.inc b/src/kernels/xform_bidirect_winograd_code.inc +index 724845f49..d03314ef5 100644 +--- a/src/kernels/xform_bidirect_winograd_code.inc ++++ b/src/kernels/xform_bidirect_winograd_code.inc +@@ -1566,7 +1566,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } +diff --git a/src/kernels/xform_metadata.inc b/src/kernels/xform_metadata.inc +index 960a9a2d8..83b736bee 100644 +--- a/src/kernels/xform_metadata.inc ++++ b/src/kernels/xform_metadata.inc +@@ -74,7 +74,7 @@ amdhsa.kernels: + .max_flat_workgroup_size: \wg_x + .wavefront_size: 64 + .args: +- - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: N } ++ - { .size: 4, .offset: 0, .value_kind: by_value, .value_type: i32, .name: X } + - { .size: 4, .offset: 4, .value_kind: by_value, .value_type: i32, .name: C } + - { .size: 4, .offset: 8, .value_kind: by_value, .value_type: i32, .name: H } + - { .size: 4, .offset: 12, .value_kind: by_value, .value_type: i32, .name: W } diff --git a/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch b/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch new file mode 100644 index 000000000000..93c66fd3ef05 --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.1.3-deprecate-clang-ocl.patch @@ -0,0 +1,55 @@ +This is a cherry picked PR on 5.1.3, which replace clang-ocl with clang +From 98f001dfe61208af04ecf7690023efd772ee7d43 Mon Sep 17 00:00:00 2001 +From: Jehandad Khan <jahandad@gmail.com> +Date: Tue, 19 Jul 2022 17:24:05 -0500 +Subject: [PATCH] Remove clang-ocl and replace with clang + +--- + CMakeLists.txt | 7 +------ + README.md | 1 - + src/hipoc/hipoc_program.cpp | 7 ++++++- + 3 files changed, 7 insertions(+), 8 deletions(-) + +Index: MIOpen-rocm-5.1.3/CMakeLists.txt +=================================================================== +--- MIOpen-rocm-5.1.3.orig/CMakeLists.txt ++++ MIOpen-rocm-5.1.3/CMakeLists.txt +@@ -241,7 +241,7 @@ if( MIOPEN_BACKEND STREQUAL "HIP" OR MIO + # miopentensile default off + set(MIOPEN_USE_MIOPENTENSILE OFF CACHE BOOL "") + +- find_program(HIP_OC_COMPILER clang-ocl ++ find_program(HIP_OC_COMPILER clang + PATH_SUFFIXES bin + PATHS + /opt/rocm +Index: MIOpen-rocm-5.1.3/README.md +=================================================================== +--- MIOpen-rocm-5.1.3.orig/README.md ++++ MIOpen-rocm-5.1.3/README.md +@@ -14,7 +14,6 @@ MIOpen supports two programming models - + * OpenCL - OpenCL libraries and header files + * HIP - + * HIP and HCC libraries and header files +- * [clang-ocl](https://github.com/RadeonOpenCompute/clang-ocl) -- **required** + * [MIOpenGEMM](https://github.com/ROCmSoftwarePlatform/MIOpenGEMM) to enable various functionalities including transposed and dilated convolutions. This is optional on the HIP backend. Users can enable this library using the cmake configuration flag `-DMIOPEN_USE_MIOPENGEMM=On`. + * ROCm cmake modules can be installed from [here](https://github.com/RadeonOpenCompute/rocm-cmake) + * [Half](http://half.sourceforge.net/) - IEEE 754-based half-precision floating point library +Index: MIOpen-rocm-5.1.3/src/hipoc/hipoc_program.cpp +=================================================================== +--- MIOpen-rocm-5.1.3.orig/src/hipoc/hipoc_program.cpp ++++ MIOpen-rocm-5.1.3/src/hipoc/hipoc_program.cpp +@@ -255,7 +255,12 @@ void HIPOCProgramImpl::BuildCodeObjectIn + if(miopen::IsEnabled(MIOPEN_DEBUG_OPENCL_WAVE64_NOWGP{})) + params += " -mwavefrontsize64 -mcumode"; + WriteFile(src, dir->path / filename); +- dir->Execute(HIP_OC_COMPILER, params + " " + filename + " -o " + hsaco_file.string()); ++ params += " -target amdgcn-amd-amdhsa -x cl -D__AMD__=1 -O3"; ++ params += " -cl-kernel-arg-info -cl-denorms-are-zero"; ++ params += " -cl-std=CL1.2 -mllvm -amdgpu-early-inline-all"; ++ params += " -mllvm -amdgpu-internalize-symbols "; ++ params += " " + filename + " -o " + hsaco_file.string(); ++ dir->Execute(HIP_OC_COMPILER, params); + } + if(!boost::filesystem::exists(hsaco_file)) + MIOPEN_THROW("Cant find file: " + hsaco_file.string()); diff --git a/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch b/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch new file mode 100644 index 000000000000..42041f2414ed --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.1.3-gfx1031.patch @@ -0,0 +1,397 @@ +Enable gfx1031 support +====================== +--- MIOpen-rocm-5.1.3/fin/src/include/conv_fin.hpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/fin/src/include/conv_fin.hpp 2022-09-01 18:36:18.630980283 +0800 +@@ -111,6 +111,10 @@ class ConvFin : public Fin + { + assert(num_cu == 72 || num_cu == 36); + } ++ else if(arch == "gfx1031") ++ { ++ assert(num_cu == 40 || num_cu == 20); ++ } + else if(arch == "gfx90a") + { + assert(num_cu == 110); +Only in MIOpen-rocm-5.1.3: patches +diff --color -upr MIOpen-rocm-5.1.3/src/composable_kernel/composable_kernel/include/utility/config.hpp gfx1031/src/composable_kernel/composable_kernel/include/utility/config.hpp +--- MIOpen-rocm-5.1.3/src/composable_kernel/composable_kernel/include/utility/config.hpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/composable_kernel/composable_kernel/include/utility/config.hpp 2022-09-01 18:36:18.634980274 +0800 +@@ -13,7 +13,7 @@ + // GPU target + // should enable one and only one GPU target + #if !(defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ +- defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1030)) ++ defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) || defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031)) + #error Need to define (only) one GPU target + #endif + +@@ -29,7 +29,7 @@ + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) || defined(CK_AMD_GPU_GFX906) || \ + defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90A) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000 +-#elif defined(CK_AMD_GPU_GFX1030) ++#elif (defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031)) + #define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000 + #endif + +@@ -37,7 +37,7 @@ + #if defined(CK_AMD_GPU_GFX803) || defined(CK_AMD_GPU_GFX900) + #define CK_USE_AMD_V_MAC_F32 + #elif defined(CK_AMD_GPU_GFX906) || defined(CK_AMD_GPU_GFX908) || defined(CK_AMD_GPU_GFX90a) || \ +- defined(CK_AMD_GPU_GFX1030) ++ (defined(CK_AMD_GPU_GFX1030) || defined(CK_AMD_GPU_GFX1031)) + #define CK_USE_AMD_V_FMAC_F32 + #define CK_USE_AMD_V_DOT2_F32_F16 + #define CK_USE_AMD_V_DOT4_I32_I8 +diff --color -upr MIOpen-rocm-5.1.3/src/include/miopen/solver/ck_utility_common.hpp gfx1031/src/include/miopen/solver/ck_utility_common.hpp +--- MIOpen-rocm-5.1.3/src/include/miopen/solver/ck_utility_common.hpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/include/miopen/solver/ck_utility_common.hpp 2022-09-01 18:36:18.638980266 +0800 +@@ -54,6 +54,7 @@ static inline bool is_ck_supported_hardw + StartsWith(handle.GetDeviceName(), "gfx908") || + StartsWith(handle.GetDeviceName(), "gfx90a") || + StartsWith(handle.GetDeviceName(), "gfx1030"); ++ StartsWith(handle.GetDeviceName(), "gfx1031"); + } + + static inline bool is_support_amd_buffer_atomic_fadd(const std::string& device_name) +@@ -83,6 +84,8 @@ static inline auto get_ck_common_compile + compiler_flag << " -DCK_AMD_GPU_GFX90A"; + else if(StartsWith(device_name, "gfx1030")) + compiler_flag << " -DCK_AMD_GPU_GFX1030"; ++ else if(StartsWith(device_name, "gfx1031")) ++ compiler_flag << " -DCK_AMD_GPU_GFX1031"; + + // buffer atomic-fadd + compiler_flag << " -DCK_USE_AMD_BUFFER_ATOMIC_FADD=" +diff --color -upr MIOpen-rocm-5.1.3/src/include/miopen/solver/implicitgemm_util.hpp gfx1031/src/include/miopen/solver/implicitgemm_util.hpp +--- MIOpen-rocm-5.1.3/src/include/miopen/solver/implicitgemm_util.hpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/include/miopen/solver/implicitgemm_util.hpp 2022-09-01 18:36:18.638980266 +0800 +@@ -469,7 +469,7 @@ static inline bool is_use_amd_buffer_loa + { + #if WORKAROUND_MIOPEN_ISSUE_557 + const auto device_name = ctx.GetStream().GetDeviceName(); +- return !StartsWith(device_name, "gfx1030"); ++ return !StartsWith(device_name, "gfx1030") && !StartsWith(device_name, "gfx1031"); + #else + return true; + #endif +@@ -478,7 +478,7 @@ static inline bool is_use_amd_buffer_loa + static inline bool is_use_v_fmac_f32(const ConvolutionContext& ctx) + { + const auto device_name = ctx.GetStream().GetDeviceName(); +- return StartsWith(device_name, "gfx1030"); ++ return StartsWith(device_name, "gfx1030") || StartsWith(device_name, "gfx1031"); + } + + static inline bool support_amd_buffer_atomic_fadd(const std::string& device_name) +@@ -599,7 +599,8 @@ static inline bool IsComposableKernelSup + StartsWith(c.GetStream().GetDeviceName(), "gfx906") || + StartsWith(c.GetStream().GetDeviceName(), "gfx908") || + StartsWith(c.GetStream().GetDeviceName(), "gfx90a") || +- StartsWith(c.GetStream().GetDeviceName(), "gfx1030"); ++ StartsWith(c.GetStream().GetDeviceName(), "gfx1030")|| ++ StartsWith(c.GetStream().GetDeviceName(), "gfx1031"); + } + + // greatest common divisor, aka highest common factor +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/batchnorm_functions.h gfx1031/src/kernels/batchnorm_functions.h +--- MIOpen-rocm-5.1.3/src/kernels/batchnorm_functions.h 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/batchnorm_functions.h 2022-09-01 18:36:18.858979772 +0800 +@@ -159,6 +159,10 @@ + #define MIO_BN_GFX1030 0 + #endif + ++#ifndef MIO_BN_GFX1031 ++#define MIO_BN_GFX1031 0 ++#endif ++ + #define UNUSED __attribute__((__unused__)) + + #if(MIO_BN_VARIANT != 4) +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdPerAct.cl gfx1031/src/kernels/MIOpenBatchNormActivBwdPerAct.cl +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdPerAct.cl 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/MIOpenBatchNormActivBwdPerAct.cl 2022-09-01 18:36:18.858979772 +0800 +@@ -34,7 +34,7 @@ + #endif + + #define MIOPEN_USE_AMDGCN 0 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1 + #undef MIOPEN_USE_AMDGCN + #define MIOPEN_USE_AMDGCN 1 + #endif +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdSpatial.cl gfx1031/src/kernels/MIOpenBatchNormActivBwdSpatial.cl +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivBwdSpatial.cl 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/MIOpenBatchNormActivBwdSpatial.cl 2022-09-01 18:36:18.858979772 +0800 +@@ -32,7 +32,7 @@ + #endif + + #define MIOPEN_USE_AMDGCN 0 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1 + #undef MIOPEN_USE_AMDGCN + #define MIOPEN_USE_AMDGCN 1 + #endif +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl gfx1031/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/MIOpenBatchNormActivFwdTrainSpatial.cl 2022-09-01 18:36:18.858979772 +0800 +@@ -33,7 +33,7 @@ + #endif + + #define MIOPEN_USE_AMDGCN 0 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1 + #undef MIOPEN_USE_AMDGCN + #define MIOPEN_USE_AMDGCN 1 + #endif +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormBwdSpatial.cl gfx1031/src/kernels/MIOpenBatchNormBwdSpatial.cl +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormBwdSpatial.cl 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/MIOpenBatchNormBwdSpatial.cl 2022-09-01 18:36:18.858979772 +0800 +@@ -33,7 +33,7 @@ + #endif + + #define MIOPEN_USE_AMDGCN 0 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1 + #undef MIOPEN_USE_AMDGCN + #define MIOPEN_USE_AMDGCN 1 + #endif +diff --color -upr MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl gfx1031/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl +--- MIOpen-rocm-5.1.3/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/kernels/MIOpenBatchNormFwdTrainSpatial.cl 2022-09-01 18:36:18.858979772 +0800 +@@ -33,7 +33,7 @@ + #endif + + #define MIOPEN_USE_AMDGCN 0 +-#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 ++#if defined(__AMDGCN__) && MIO_BN_GFX1030 != 1 && MIO_BN_GFX1031 != 1 + #undef MIOPEN_USE_AMDGCN + #define MIOPEN_USE_AMDGCN 1 + #endif +diff --color -upr MIOpen-rocm-5.1.3/src/md_graph.cpp gfx1031/src/md_graph.cpp +--- MIOpen-rocm-5.1.3/src/md_graph.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/md_graph.cpp 2022-09-01 18:36:18.630980283 +0800 +@@ -738,8 +738,8 @@ void FusionMDGraph::InitConv(FusionMDGra + + add_v21_wino("gfx9", {"gfx900", "gfx906", "gfx908", "gfx90a"}, 1); + add_v21_wino("gfx9", {"gfx900", "gfx906", "gfx908", "gfx90a"}, 2); +- add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030"}, 1); +- add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030"}, 2); ++ add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030", "gfx1031"}, 1); ++ add_v21_wino("gfx10", {"gfx1011", "gfx1012", "gfx1030", "gfx1031"}, 2); + } + } + +diff --color -upr MIOpen-rocm-5.1.3/src/ocl/fusionopbiasbnactivocl.cpp gfx1031/src/ocl/fusionopbiasbnactivocl.cpp +--- MIOpen-rocm-5.1.3/src/ocl/fusionopbiasbnactivocl.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/ocl/fusionopbiasbnactivocl.cpp 2022-09-01 18:36:18.634980274 +0800 +@@ -392,7 +392,8 @@ miopenStatus_t BatchNormBwdTrainFusionOp + " -DMIO_BN_USESAVED=" + std::to_string(static_cast<int>(true)) + + " -DMIO_BN_VARIANT=" + std::to_string(variant) + + " -DMIO_BN_CBA_WRITE_INTERMEDIATE=" + std::to_string(0) + +- " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0"); ++ " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0") + ++ " -DMIO_BN_GFX1031=" + ((handle.GetDeviceName() == "gfx1031") ? "1" : "0"); + + compile_config += add; + MIOPEN_LOG_I2(add); +@@ -607,7 +608,8 @@ miopenStatus_t BatchNormFwdTrainFusionOp + " -DMIO_SAVE_MEAN_VARIANCE=" + (saveBatchStats ? "1" : "0") + + " -DMIO_RUNNING_RESULT=" + ((savePopStats) ? "1" : "0") + + " -DMIO_BN_VARIANT=" + std::to_string(variant) + +- " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0"); ++ " -DMIO_BN_GFX1030=" + ((handle.GetDeviceName() == "gfx1030") ? "1" : "0") + ++ " -DMIO_BN_GFX1031=" + ((handle.GetDeviceName() == "gfx1031") ? "1" : "0"); + + compile_config += add; + MIOPEN_LOG_I2(add); +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_per_activation.cpp gfx1031/src/solver/batchnorm/backward_per_activation.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_per_activation.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/backward_per_activation.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -113,6 +113,7 @@ BnBwdTrainingPerActivation::GetSolution( + {"MIO_BN_GRP1", ylocalsize}, + {"MIO_BN_GRP2", zlocalsize}, + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + }; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_multiple.cpp gfx1031/src/solver/batchnorm/backward_spatial_multiple.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_multiple.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/backward_spatial_multiple.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -210,6 +210,7 @@ ConvSolution BnBwdTrainingSpatialMultipl + {"MIO_BN_GRP1", ylocalsize}, + {"MIO_BN_GRP2", zlocalsize}, + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())}, + }; + +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_single.cpp gfx1031/src/solver/batchnorm/backward_spatial_single.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/backward_spatial_single.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/backward_spatial_single.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -247,6 +247,7 @@ BnBwdTrainingSpatialSingle::GetSolution( + + build_params << KernelBuildParameters{ + {"MIO_BN_GFX1030", (handle.GetDeviceName() == "gfx1030") ? "1" : "0"}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + }; + + kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{}); +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_inference.cpp gfx1031/src/solver/batchnorm/forward_inference.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_inference.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/forward_inference.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -103,6 +103,7 @@ ConvSolution BnFwdInference::GetSolution + {"MIO_BN_GRP1", ylocalsize},
+ {"MIO_BN_GRP2", zlocalsize},
+ {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")},
++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")},
+ };
+
+ kernel.comp_options = build_params.GenerateFor(kbp::OpenCL{});
+diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_per_activation.cpp gfx1031/src/solver/batchnorm/forward_per_activation.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_per_activation.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/forward_per_activation.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -105,6 +105,7 @@ BnFwdTrainingPerActivation::GetSolution( + {"MIO_BN_GRP1", ylocalsize}, + {"MIO_BN_GRP2", zlocalsize}, + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + }; + + auto kernel = KernelInfo{}; +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_multiple.cpp gfx1031/src/solver/batchnorm/forward_spatial_multiple.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_multiple.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/forward_spatial_multiple.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -177,6 +177,7 @@ ConvSolution BnFwdTrainingSpatialMultipl + {"MIO_BN_GRP1", ylocalsize}, + {"MIO_BN_GRP2", zlocalsize}, + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())}, + }; + +diff --color -upr MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_single.cpp gfx1031/src/solver/batchnorm/forward_spatial_single.cpp +--- MIOpen-rocm-5.1.3/src/solver/batchnorm/forward_spatial_single.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/solver/batchnorm/forward_spatial_single.cpp 2022-09-01 18:36:18.638980266 +0800 +@@ -211,6 +211,7 @@ BnFwdTrainingSpatialSingle::GetSolution( + {"MIO_BN_GRP1", ylocalsize}, + {"MIO_BN_GRP2", zlocalsize}, + {"MIO_BN_GFX1030", ((handle.GetDeviceName() == "gfx1030") ? "1" : "0")}, ++ {"MIO_BN_GFX1031", ((handle.GetDeviceName() == "gfx1031") ? "1" : "0")}, + {"MIO_LAYOUT_NHWC", static_cast<int>(problem.IsLayoutNHWC())}, + }; + +diff --color -upr MIOpen-rocm-5.1.3/src/target_properties.cpp gfx1031/src/target_properties.cpp +--- MIOpen-rocm-5.1.3/src/target_properties.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/src/target_properties.cpp 2022-09-01 18:36:18.630980283 +0800 +@@ -54,6 +54,7 @@ static std::string GetDeviceNameFromMap( + {"Vega10", "gfx900"}, + {"gfx901", "gfx900"}, + {"10.3.0 Sienna_Cichlid 18", "gfx1030"}, ++ {"10.3.1 Navi_flounder 18", "gfx1031"}, + }; + + const char* const p_asciz = miopen::GetStringEnv(MIOPEN_DEBUG_ENFORCE_DEVICE{}); +diff --color -upr MIOpen-rocm-5.1.3/test/CMakeLists.txt gfx1031/test/CMakeLists.txt +--- MIOpen-rocm-5.1.3/test/CMakeLists.txt 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/test/CMakeLists.txt 2022-09-01 18:36:19.022979405 +0800 +@@ -38,6 +38,7 @@ option( MIOPEN_TEST_GFX90A "Test on gfx9 + option( MIOPEN_TEST_GFX900 "Test on Vega10 (gfx900)" OFF ) + option( MIOPEN_TEST_GFX906 "Test on Vega20 (gfx906)" OFF ) + option( MIOPEN_TEST_GFX1030 "Test on Navi21 (gfx1030)" OFF ) ++option( MIOPEN_TEST_GFX1031 "Test on Navi21 (gfx1031)" OFF ) + option( MIOPEN_TEST_GPU_XNACK_ENABLED "Test as if XNACK mode is enabled" OFF ) + option( MIOPEN_TEST_CONV Off) + option( MIOPEN_TEST_DEEPBENCH Off) +@@ -74,7 +75,7 @@ endif() + # Also we do not detect GPU when target GPU for testing is specified explicitly. + set(MIOPEN_TEST_GPU_DETECTION_FAILED FALSE) + set(MIOPEN_NO_GPU FALSE) +-if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TEST_GFX906 OR MIOPEN_TEST_GFX908 OR MIOPEN_TEST_GFX90A OR MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_HIP_NOGPU)) ++if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TEST_GFX906 OR MIOPEN_TEST_GFX908 OR MIOPEN_TEST_GFX90A OR MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 OR MIOPEN_TEST_HIP_NOGPU)) + find_program(ROCMINFO + NAMES rocminfo + PATHS +@@ -96,6 +97,8 @@ if(NOT (MIOPEN_TEST_GFX900 OR MIOPEN_TES + elseif (NOT ROCMINFO_EXIT_STATUS EQUAL 0) + message(WARNING "ROCMINFO FAILED, GPU TYPE UNKNOWN. Manually set respective MIOPEN_TEST_GFX* CMake variable to specify target GPU for testing.") + set(MIOPEN_TEST_GPU_DETECTION_FAILED TRUE) ++ elseif(ROCMINFO_OUTPUT MATCHES "gfx1031") ++ set(MIOPEN_TEST_GFX1031 ON) + elseif(ROCMINFO_OUTPUT MATCHES "gfx1030") + set(MIOPEN_TEST_GFX1030 ON) + elseif(ROCMINFO_OUTPUT MATCHES "gfx900") +@@ -125,6 +128,7 @@ message(STATUS "MIOPEN_TEST_GFX906 ${MIO + message(STATUS "MIOPEN_TEST_GFX908 ${MIOPEN_TEST_GFX908}") + message(STATUS "MIOPEN_TEST_GFX90A ${MIOPEN_TEST_GFX90A}") + message(STATUS "MIOPEN_TEST_GFX1030 ${MIOPEN_TEST_GFX1030}") ++message(STATUS "MIOPEN_TEST_GFX1031 ${MIOPEN_TEST_GFX1031}") + message(STATUS "MIOPEN_TEST_GPU_XNACK_ENABLED ${MIOPEN_TEST_GPU_XNACK_ENABLED}") + message(STATUS "MIOPEN_TEST_GPU_DETECTION_FAILED ${MIOPEN_TEST_GPU_DETECTION_FAILED}") + +@@ -167,10 +171,10 @@ endmacro() + set_var_to_condition(WORKAROUND_ISSUE_1187_DEFAULT MIOPEN_TEST_GFX90A AND MIOPEN_TEST_FLOAT) + option( WORKAROUND_ISSUE_1187 "" ${WORKAROUND_ISSUE_1187_DEFAULT}) + +-set_var_to_condition(WORKAROUND_ISSUE_1148_DEFAULT MIOPEN_TEST_GFX1030 AND MIOPEN_TEST_FLOAT) ++set_var_to_condition(WORKAROUND_ISSUE_1148_DEFAULT MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 AND MIOPEN_TEST_FLOAT) + option( WORKAROUND_ISSUE_1148 "" ${WORKAROUND_ISSUE_1148_DEFAULT}) + +-set_var_to_condition(WORKAROUND_ISSUE_1334_DEFAULT MIOPEN_TEST_GFX1030 AND MIOPEN_TEST_FLOAT) ++set_var_to_condition(WORKAROUND_ISSUE_1334_DEFAULT MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031 AND MIOPEN_TEST_FLOAT) + option( WORKAROUND_ISSUE_1334 "" ${WORKAROUND_ISSUE_1334_DEFAULT}) + + if(NOT MIOPEN_TEST_MIOTENSILE) +@@ -216,7 +220,7 @@ if (MIOPEN_NO_GPU) + test_pooling3d test_perfdb) + endif() + +-if(MIOPEN_TEST_GFX1030) ++if(MIOPEN_TEST_GFX1030 OR MIOPEN_TEST_GFX1031) + if(WORKAROUND_ISSUE_1053 AND MIOPEN_TEST_ALL) + list(APPEND SKIP_TESTS test_lrn_test) + endif() +@@ -439,7 +443,7 @@ endfunction() + # If nothing is specified, the default value is taken. + # Default: FLOAT_ENABLED HALF_DISABLED BF16_DISABLED INT8_DISABLED + # +-# GPU types: GFX900, GFX906, GFX908, GFX90A, GFX1030 ++# GPU types: GFX900, GFX906, GFX908, GFX90A, GFX1030, GFX1031 + # The option can be enabled or disabled by using '_ENABLED' and '_DISABLED' suffix. + # If nothing is specified, the default value is taken. + # Default: GFX900_ENABLED, GFX906_ENABLED, GFX908_ENABLED, GFX90A_ENABLED, GFX1030_DISABLED +@@ -571,7 +575,7 @@ function(add_custom_test NAME) + set_tests_properties(${NAME} PROPERTIES RUN_SERIAL On) + endif() + +- if( (is_gfx900_check OR is_gfx906_check OR is_gfx908_check OR is_gfx1030_check OR is_gfx90a_check) ++ if( (is_gfx900_check OR is_gfx906_check OR is_gfx908_check OR is_gfx1030_check OR is_gfx1031_check OR is_gfx90a_check) + AND is_full_check + AND is_xnack_on_check + AND (is_miotensile_check AND is_mlir_check) +diff --color -upr MIOpen-rocm-5.1.3/test/handle_test.cpp gfx1031/test/handle_test.cpp +--- MIOpen-rocm-5.1.3/test/handle_test.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/test/handle_test.cpp 2022-09-01 18:36:19.018979413 +0800 +@@ -234,7 +234,7 @@ void test_warnings(kernel_type_t kern_ty + void test_arch_name() + { + auto&& h = get_handle(); +- auto known_arch = {"gfx908", "gfx90a", "gfx906", "gfx900", "gfx803", "gfx1030"}; ++ auto known_arch = {"gfx908", "gfx90a", "gfx906", "gfx900", "gfx803", "gfx1030", "gfx1031"}; + auto this_arch = h.GetDeviceName(); + EXPECT(std::any_of( + known_arch.begin(), known_arch.end(), [&](std::string arch) { return arch == this_arch; })); +diff --color -upr MIOpen-rocm-5.1.3/test/mdgraph.cpp gfx1031/test/mdgraph.cpp +--- MIOpen-rocm-5.1.3/test/mdgraph.cpp 2022-05-08 14:08:05.000000000 +0800 ++++ gfx1031/test/mdgraph.cpp 2022-09-01 18:36:19.022979405 +0800 +@@ -222,7 +222,7 @@ struct mdgraph_driver : test_driver + auto target = h.GetTargetProperties(); + + auto wino_supported_arch = { +- "gfx1030", "gfx1012", "gfx1011", "gfx90a", "gfx908", "gfx906", "gfx900", "gfx803"}; ++ "gfx1030", "gfx1031","gfx1012", "gfx1011", "gfx90a", "gfx908", "gfx906", "gfx900", "gfx803"}; + + auto is_wino_support = !xnack_enabled && + !miopen::IsDisabled(MIOPEN_DEBUG_GCN_ASM_KERNELS{}) && diff --git a/sci-libs/miopen/files/miopen-5.1.3-include-array.patch b/sci-libs/miopen/files/miopen-5.1.3-include-array.patch new file mode 100644 index 000000000000..fc6a36d22548 --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.1.3-include-array.patch @@ -0,0 +1,12 @@ +This fixes compile error upon gcc-12 libstdc++ +index 1cfb2a72c..0f4feb406 100644 +--- a/test/sequences.cpp ++++ b/test/sequences.cpp +@@ -25,6 +25,7 @@ + *******************************************************************************/ + #include "test.hpp" + #include <miopen/sequences.hpp> ++#include <array> + + namespace miopen { + namespace seq { diff --git a/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch b/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch new file mode 100644 index 000000000000..0d1d429dbb85 --- /dev/null +++ b/sci-libs/miopen/files/miopen-5.1.3-no-strip.patch @@ -0,0 +1,17 @@ +Don't strip for release. Let portage handle stripping. +Index: MIOpen-rocm-5.1.3/CMakeLists.txt +=================================================================== +--- MIOpen-rocm-5.1.3.orig/CMakeLists.txt ++++ MIOpen-rocm-5.1.3/CMakeLists.txt +@@ -78,11 +78,6 @@ option( BUILD_DEV "Build for development + option(MIOPEN_ENABLE_FIN "Enable the fin driver for MIOpen" OFF) + + +-# Strip symbols for release +-if(NOT WIN32 AND NOT APPLE) +- set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -s") +- set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") +-endif() + + rocm_setup_version(VERSION 2.16.0) + diff --git a/sci-libs/miopen/miopen-5.1.3.ebuild b/sci-libs/miopen/miopen-5.1.3.ebuild new file mode 100644 index 000000000000..a7d8b5083f60 --- /dev/null +++ b/sci-libs/miopen/miopen-5.1.3.ebuild @@ -0,0 +1,109 @@ +# Copyright 1999-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +ROCM_VERSION=${PV} + +inherit cmake flag-o-matic llvm rocm + +LLVM_MAX_SLOT=14 + +DESCRIPTION="AMD's Machine Intelligence Library" +HOMEPAGE="https://github.com/ROCmSoftwarePlatform/MIOpen" +SRC_URI="https://github.com/ROCmSoftwarePlatform/MIOpen/archive/rocm-${PV}.tar.gz -> MIOpen-${PV}.tar.gz" + +LICENSE="MIT" +KEYWORDS="~amd64" +SLOT="0/$(ver_cut 1-2)" + +IUSE="debug test" +RESTRICT="!test? ( test )" + +RDEPEND=" + >=dev-util/hip-5.1.3 + >=dev-db/sqlite-3.17 + sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}] + >=dev-libs/boost-1.72 +" + +DEPEND="${RDEPEND}" + +BDEPEND="dev-libs/half:0/1 + dev-util/rocm-cmake +" + +S="${WORKDIR}/MIOpen-rocm-${PV}" + +PATCHES=( + "${FILESDIR}/${PN}-4.2.0-disable-no-inline-boost.patch" + "${FILESDIR}/${PN}-4.2.0-gcc11-numeric_limits.patch" + "${FILESDIR}/${PN}-5.0.2-strip-xnack-in-flags.patch" + "${FILESDIR}/${PN}-4.3.0-fix-interface-include-in-HIP_COMPILER_FLAGS.patch" + "${FILESDIR}/${PN}-4.3.0-enable-test.patch" + "${FILESDIR}/${PN}-5.1.3-gfx1031.patch" + "${FILESDIR}/${PN}-5.1.3-deprecate-clang-ocl.patch" + "${FILESDIR}/${PN}-5.1.3-no-strip.patch" + "${FILESDIR}/${PN}-5.1.3-include-array.patch" + "${FILESDIR}/${PN}-5.1.3-avoid-metadata-error-for-vanilla-clang.patch" +) + +src_prepare() { + cmake_src_prepare + + sed -e "s:/opt/rocm/llvm:$(get_llvm_prefix ${LLVM_MAX_SLOT}) NO_DEFAULT_PATH:" \ + -e "s:/opt/rocm/hip:$(hipconfig -p) NO_DEFAULT_PATH:" \ + -e '/set( MIOPEN_INSTALL_DIR/s:miopen:${CMAKE_INSTALL_PREFIX}:' \ + -e '/MIOPEN_TIDY_ERRORS ALL/d' \ + -i CMakeLists.txt || die + + sed -e "/rocm_install_symlink_subdir(\${MIOPEN_INSTALL_DIR})/d" -i src/CMakeLists.txt || die + sed -e "/add_test/s:--build \${CMAKE_CURRENT_BINARY_DIR}:--build ${BUILD_DIR}:" -i test/CMakeLists.txt || die + + sed -e "s:\${AMD_DEVICE_LIBS_PREFIX}/lib:${EPREFIX}/usr/lib/amdgcn/bitcode:" -i cmake/hip-config.cmake || die + + # This plus avoid-metadata-error-for-vanilla-clang.patch fix bug mentioned + # in https://github.com/ROCmSoftwarePlatform/MIOpen/issues/1731 + find src/kernels -name "*.s" -exec \ + sed -e "s/.name: n /.name: x /g" -e "s/.name: y /.name: z /g" \ + -e "s/.name: y,/.name: z,/g" -i {} \; || die +} + +src_configure() { + if ! use debug; then + append-cflags "-DNDEBUG" + append-cxxflags "-DNDEBUG" + CMAKE_BUILD_TYPE="Release" + else + CMAKE_BUILD_TYPE="Debug" + fi + + local mycmakeargs=( + -DCMAKE_SKIP_RPATH=ON + -DAMDGPU_TARGETS="$(get_amdgpu_flags)" + -DCMAKE_INSTALL_PREFIX="${EPREFIX}/usr" + -DMIOPEN_BACKEND=HIP + -DBoost_USE_STATIC_LIBS=OFF + -DMIOPEN_USE_MLIR=OFF + -DBUILD_TESTS=$(usex test ON OFF) + -DMIOPEN_TEST_ALL=$(usex test ON OFF) + ) + + if use test; then + for gpu_target in ${AMDGPU_TARGETS}; do + mycmakeargs+=( -DMIOPEN_TEST_${gpu_target^^}=ON ) + done + fi + + addpredict /dev/kfd + addpredict /dev/dri/ + append-cxxflags "--rocm-path=$(hipconfig -R)" + append-cxxflags "--hip-device-lib-path=${EPREFIX}/usr/lib/amdgcn/bitcode" + CXX="$(get_llvm_prefix ${LLVM_MAX_SLOT})/bin/clang++" cmake_src_configure +} + +src_test() { + check_amdgpu + export LD_LIBRARY_PATH="${BUILD_DIR}"/lib + MAKEOPTS="-j1" cmake_src_test +} diff --git a/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild b/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild index e643014fda76..77092f820467 100644 --- a/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild +++ b/sci-libs/rocSOLVER/rocSOLVER-5.1.3.ebuild @@ -18,7 +18,6 @@ SLOT="0/$(ver_cut 1-2)" IUSE="test benchmark" REQUIRED_USE="${ROCM_REQUIRED_USE}" -# sci-libs/rocBLAS:${SLOT} RDEPEND="dev-util/hip sci-libs/rocBLAS:${SLOT}[${ROCM_USEDEP}] =dev-libs/libfmt-8* |