173 lines
5.5 KiB
Diff
173 lines
5.5 KiB
Diff
|
|
From 828cc9e3083f399ca550ba9617b20fc282a73896 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||
|
|
Date: Thu, 29 Aug 2024 19:41:23 +0800
|
||
|
|
Subject: [PATCH] Correct shll/shll2 patterns
|
||
|
|
|
||
|
|
---
|
||
|
|
gcc/config/aarch64/aarch64-simd.md | 34 +++++-----
|
||
|
|
gcc/config/aarch64/predicates.md | 14 +++-
|
||
|
|
gcc/testsuite/gcc.target/aarch64/jdcolor_le.c | 68 +++++++++++++++++++
|
||
|
|
3 files changed, 98 insertions(+), 18 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
index 754343abc..523423784 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
@@ -4712,16 +4712,16 @@
|
||
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
||
|
|
)
|
||
|
|
|
||
|
|
-(define_insn "*aarch64_simd_vec_unpacks_lo_shiftsi"
|
||
|
|
- [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||
|
|
- (ashift:V4SI
|
||
|
|
- (sign_extend:V4SI
|
||
|
|
- (vec_select:V4HI
|
||
|
|
- (match_operand:V8HI 1 "register_operand" "w")
|
||
|
|
- (match_operand:V8HI 2 "vect_par_cnst_lo_half" "")))
|
||
|
|
- (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
|
||
|
|
+(define_insn "*aarch64_simd_vec_unpacks_lo_shift<mode>"
|
||
|
|
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
|
||
|
|
+ (ashift:<VDBLW>
|
||
|
|
+ (sign_extend:<VDBLW>
|
||
|
|
+ (vec_select:<VHALF>
|
||
|
|
+ (match_operand:VQW 1 "register_operand" "w")
|
||
|
|
+ (match_operand:VQW 2 "vect_par_cnst_lo_half" "")))
|
||
|
|
+ (match_operand:<VDBLW> 3 "aarch64_simd_shift_imm_bitsize_<mode>" "i")))]
|
||
|
|
"TARGET_SIMD"
|
||
|
|
- "shll\t%0.4s, %1.4h, #%3"
|
||
|
|
+ "shll\t%0.<Vwtype>, %1.<Vhalftype>, #%3"
|
||
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
||
|
|
)
|
||
|
|
|
||
|
|
@@ -4743,15 +4743,15 @@
|
||
|
|
)
|
||
|
|
|
||
|
|
(define_insn "*aarch64_simd_vec_unpacks_hi_shiftsi"
|
||
|
|
- [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||
|
|
- (ashift:V4SI
|
||
|
|
- (sign_extend:V4SI
|
||
|
|
- (vec_select:V4HI
|
||
|
|
- (match_operand:V8HI 1 "register_operand" "w")
|
||
|
|
- (match_operand:V8HI 2 "vect_par_cnst_hi_half" "")))
|
||
|
|
- (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
|
||
|
|
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
|
||
|
|
+ (ashift:<VDBLW>
|
||
|
|
+ (sign_extend:<VDBLW>
|
||
|
|
+ (vec_select:<VHALF>
|
||
|
|
+ (match_operand:VQW 1 "register_operand" "w")
|
||
|
|
+ (match_operand:VQW 2 "vect_par_cnst_hi_half" "")))
|
||
|
|
+ (match_operand:<VDBLW> 3 "aarch64_simd_shift_imm_bitsize_<mode>" "i")))]
|
||
|
|
"TARGET_SIMD"
|
||
|
|
- "shll2\t%0.4s, %1.8h, #%3"
|
||
|
|
+ "shll2\t%0.<Vwtype>, %1.<Vtype>, #%3"
|
||
|
|
[(set_attr "type" "neon_shift_imm_long")]
|
||
|
|
)
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||
|
|
index d0a55b44a..72c7ece57 100644
|
||
|
|
--- a/gcc/config/aarch64/predicates.md
|
||
|
|
+++ b/gcc/config/aarch64/predicates.md
|
||
|
|
@@ -617,11 +617,23 @@
|
||
|
|
(and (match_code "const_int")
|
||
|
|
(match_test "IN_RANGE (INTVAL (op), 0, 64)")))
|
||
|
|
|
||
|
|
+(define_predicate "aarch64_simd_shift_imm_bitsize_v16qi"
|
||
|
|
+ (match_code "const_vector")
|
||
|
|
+{
|
||
|
|
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 8;
|
||
|
|
+})
|
||
|
|
+
|
||
|
|
+(define_predicate "aarch64_simd_shift_imm_bitsize_v8hi"
|
||
|
|
+ (match_code "const_vector")
|
||
|
|
+{
|
||
|
|
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 16;
|
||
|
|
+})
|
||
|
|
+
|
||
|
|
(define_predicate "aarch64_simd_shift_imm_bitsize_v4si"
|
||
|
|
(match_code "const_vector")
|
||
|
|
{
|
||
|
|
HOST_WIDE_INT val = INTVAL (unwrap_const_vec_duplicate (op));
|
||
|
|
- return val == 8 || val == 16 || val == 32;
|
||
|
|
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 32;
|
||
|
|
})
|
||
|
|
|
||
|
|
(define_predicate "aarch64_constant_pool_symref"
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..2b66b13c1
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
|
||
|
|
@@ -0,0 +1,68 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O3" } */
|
||
|
|
+/* It's a preprocessed part of libjpeg-turbo. */
|
||
|
|
+
|
||
|
|
+typedef int boolean;
|
||
|
|
+typedef short INT16;
|
||
|
|
+typedef unsigned short UINT16;
|
||
|
|
+typedef long unsigned int size_t;
|
||
|
|
+typedef long JLONG;
|
||
|
|
+typedef unsigned int JDIMENSION;
|
||
|
|
+typedef short J12SAMPLE;
|
||
|
|
+typedef J12SAMPLE *J12SAMPROW;
|
||
|
|
+typedef J12SAMPROW *J12SAMPARRAY;
|
||
|
|
+typedef J12SAMPARRAY *J12SAMPIMAGE;
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+rgb_rgb565_convert_le(JDIMENSION num_cols, J12SAMPIMAGE input_buf,
|
||
|
|
+ JDIMENSION input_row, J12SAMPARRAY output_buf,
|
||
|
|
+ int num_rows)
|
||
|
|
+{
|
||
|
|
+ register J12SAMPROW outptr;
|
||
|
|
+ register J12SAMPROW inptr0, inptr1, inptr2;
|
||
|
|
+ register JDIMENSION col;
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+ while (--num_rows >= 0) {
|
||
|
|
+ JLONG rgb;
|
||
|
|
+ unsigned int r, g, b;
|
||
|
|
+
|
||
|
|
+ inptr0 = input_buf[0][input_row];
|
||
|
|
+ inptr1 = input_buf[1][input_row];
|
||
|
|
+ inptr2 = input_buf[2][input_row];
|
||
|
|
+ input_row++;
|
||
|
|
+ outptr = *output_buf++;
|
||
|
|
+ if ((((size_t)(outptr)) & 3)) {
|
||
|
|
+ r = *inptr0++;
|
||
|
|
+ g = *inptr1++;
|
||
|
|
+ b = *inptr2++;
|
||
|
|
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
|
||
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
||
|
|
+ outptr += 2;
|
||
|
|
+ num_cols--;
|
||
|
|
+ }
|
||
|
|
+ for (col = 0; col < (num_cols >> 1); col++) {
|
||
|
|
+ r = *inptr0++;
|
||
|
|
+ g = *inptr1++;
|
||
|
|
+ b = *inptr2++;
|
||
|
|
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
|
||
|
|
+
|
||
|
|
+ r = *inptr0++;
|
||
|
|
+ g = *inptr1++;
|
||
|
|
+ b = *inptr2++;
|
||
|
|
+ rgb = ((((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)) << 16) | rgb);
|
||
|
|
+
|
||
|
|
+ ((*(int *)(outptr)) = rgb);
|
||
|
|
+ outptr += 4;
|
||
|
|
+ }
|
||
|
|
+ if (num_cols & 1) {
|
||
|
|
+ r = *inptr0;
|
||
|
|
+ g = *inptr1;
|
||
|
|
+ b = *inptr2;
|
||
|
|
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
|
||
|
|
+ *(INT16 *)outptr = (INT16)rgb;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+/* We should not generate shll[2] for this test case. */
|
||
|
|
+/* { dg-final { scan-assembler-not "shll" } } */
|
||
|
|
--
|
||
|
|
2.19.1
|
||
|
|
|