diff options
author | Evgeniy Pavlov <evgeniy.pavlov@intel.com> | 2021-02-15 14:24:25 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-15 14:24:25 +0300 |
commit | 9dd50af416dfcc4a688a331459d88b066ae2884f (patch) | |
tree | a7844098be84ac1a404239be918b227ae414842f | |
parent | Extend CI tests (#110) (diff) | |
download | llvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.tar.gz llvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.tar.bz2 llvm-project-9dd50af416dfcc4a688a331459d88b066ae2884f.zip |
Remove unnecessary barriers and add constexpr attribute in scan brick (#119)
* Add constexpr attribute to__iters_per_witem variable
Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com>
* Remove unnesessary barriers
Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com>
* Remove auto to decltype
Signed-off-by: Pavlov, Evgeniy <evgeniy.pavlov@intel.com>
-rw-r--r-- | include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 | ||||
-rw-r--r-- | include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h | 4 |
2 files changed, 1 insertions, 5 deletions
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index b7756df15f62..1532d1362778 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -440,7 +440,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& #endif // Practically this is the better value that was found - auto __iters_per_witem = decltype(__wgroup_size)(16); + constexpr decltype(__wgroup_size) __iters_per_witem = 16; auto __size_per_wg = __iters_per_witem * __wgroup_size; auto __n_groups = (__n - 1) / __size_per_wg + 1; // Storage for the results of scan for each workgroup diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index 4895be8cec49..241dec088393 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -543,7 +543,6 @@ struct __scan __local_acc[__local_id] = __partial_sums; __item.barrier(sycl::access::fence_space::local_space); __adder = __local_acc[__wgroup_size - 1]; - __item.barrier(sycl::access::fence_space::local_space); if (__adjusted_global_id + __shift < __n) __gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id); @@ -627,7 +626,6 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va __local_acc[__local_id] = __data_acc(__adjusted_global_id, __acc); else __local_acc[__local_id] = _Tp{0}; // for plus only - __item.barrier(sycl::access::fence_space::local_space); // the result of __unary_op must be convertible to _Tp _Tp __old_value = __unary_op(__local_id, __local_acc); @@ -635,13 +633,11 @@ struct __scan<_Inclusive, _ExecutionPolicy, ::std::plus<typename _InitType::__va __old_value = __bin_op(__adder, __old_value); else if (__adjusted_global_id == 0) __use_init(__init, __old_value, __bin_op); - __item.barrier(sycl::access::fence_space::local_space); __local_acc[__local_id] = sycl::ONEAPI::inclusive_scan(__item.get_group(), __old_value, __bin_op); __item.barrier(sycl::access::fence_space::local_space); __adder = __local_acc[__wgroup_size - 1]; - __item.barrier(sycl::access::fence_space::local_space); if (__adjusted_global_id + __shift < __n) __gl_assigner(__acc, __out_acc, __adjusted_global_id + __shift, __local_acc, __local_id); |