gcc/0193-Correct-shll-shll2-patterns.patch

173 lines
5.5 KiB
Diff
Raw Normal View History

From 828cc9e3083f399ca550ba9617b20fc282a73896 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Thu, 29 Aug 2024 19:41:23 +0800
Subject: [PATCH] Correct shll/shll2 patterns
---
gcc/config/aarch64/aarch64-simd.md | 34 +++++-----
gcc/config/aarch64/predicates.md | 14 +++-
gcc/testsuite/gcc.target/aarch64/jdcolor_le.c | 68 +++++++++++++++++++
3 files changed, 98 insertions(+), 18 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 754343abc..523423784 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4712,16 +4712,16 @@
[(set_attr "type" "neon_shift_imm_long")]
)
-(define_insn "*aarch64_simd_vec_unpacks_lo_shiftsi"
- [(set (match_operand:V4SI 0 "register_operand" "=w")
- (ashift:V4SI
- (sign_extend:V4SI
- (vec_select:V4HI
- (match_operand:V8HI 1 "register_operand" "w")
- (match_operand:V8HI 2 "vect_par_cnst_lo_half" "")))
- (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
+(define_insn "*aarch64_simd_vec_unpacks_lo_shift<mode>"
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (ashift:<VDBLW>
+ (sign_extend:<VDBLW>
+ (vec_select:<VHALF>
+ (match_operand:VQW 1 "register_operand" "w")
+ (match_operand:VQW 2 "vect_par_cnst_lo_half" "")))
+ (match_operand:<VDBLW> 3 "aarch64_simd_shift_imm_bitsize_<mode>" "i")))]
"TARGET_SIMD"
- "shll\t%0.4s, %1.4h, #%3"
+ "shll\t%0.<Vwtype>, %1.<Vhalftype>, #%3"
[(set_attr "type" "neon_shift_imm_long")]
)
@@ -4743,15 +4743,15 @@
)
(define_insn "*aarch64_simd_vec_unpacks_hi_shiftsi"
- [(set (match_operand:V4SI 0 "register_operand" "=w")
- (ashift:V4SI
- (sign_extend:V4SI
- (vec_select:V4HI
- (match_operand:V8HI 1 "register_operand" "w")
- (match_operand:V8HI 2 "vect_par_cnst_hi_half" "")))
- (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
+ [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
+ (ashift:<VDBLW>
+ (sign_extend:<VDBLW>
+ (vec_select:<VHALF>
+ (match_operand:VQW 1 "register_operand" "w")
+ (match_operand:VQW 2 "vect_par_cnst_hi_half" "")))
+ (match_operand:<VDBLW> 3 "aarch64_simd_shift_imm_bitsize_<mode>" "i")))]
"TARGET_SIMD"
- "shll2\t%0.4s, %1.8h, #%3"
+ "shll2\t%0.<Vwtype>, %1.<Vtype>, #%3"
[(set_attr "type" "neon_shift_imm_long")]
)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index d0a55b44a..72c7ece57 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -617,11 +617,23 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 64)")))
+(define_predicate "aarch64_simd_shift_imm_bitsize_v16qi"
+ (match_code "const_vector")
+{
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 8;
+})
+
+(define_predicate "aarch64_simd_shift_imm_bitsize_v8hi"
+ (match_code "const_vector")
+{
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 16;
+})
+
(define_predicate "aarch64_simd_shift_imm_bitsize_v4si"
(match_code "const_vector")
{
HOST_WIDE_INT val = INTVAL (unwrap_const_vec_duplicate (op));
- return val == 8 || val == 16 || val == 32;
+ return INTVAL (unwrap_const_vec_duplicate (op)) == 32;
})
(define_predicate "aarch64_constant_pool_symref"
diff --git a/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
new file mode 100644
index 000000000..2b66b13c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c
@@ -0,0 +1,68 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* It's a preprocessed part of libjpeg-turbo. */
+
+typedef int boolean;
+typedef short INT16;
+typedef unsigned short UINT16;
+typedef long unsigned int size_t;
+typedef long JLONG;
+typedef unsigned int JDIMENSION;
+typedef short J12SAMPLE;
+typedef J12SAMPLE *J12SAMPROW;
+typedef J12SAMPROW *J12SAMPARRAY;
+typedef J12SAMPARRAY *J12SAMPIMAGE;
+
+void
+rgb_rgb565_convert_le(JDIMENSION num_cols, J12SAMPIMAGE input_buf,
+ JDIMENSION input_row, J12SAMPARRAY output_buf,
+ int num_rows)
+{
+ register J12SAMPROW outptr;
+ register J12SAMPROW inptr0, inptr1, inptr2;
+ register JDIMENSION col;
+
+
+ while (--num_rows >= 0) {
+ JLONG rgb;
+ unsigned int r, g, b;
+
+ inptr0 = input_buf[0][input_row];
+ inptr1 = input_buf[1][input_row];
+ inptr2 = input_buf[2][input_row];
+ input_row++;
+ outptr = *output_buf++;
+ if ((((size_t)(outptr)) & 3)) {
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
+ *(INT16 *)outptr = (INT16)rgb;
+ outptr += 2;
+ num_cols--;
+ }
+ for (col = 0; col < (num_cols >> 1); col++) {
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
+
+ r = *inptr0++;
+ g = *inptr1++;
+ b = *inptr2++;
+ rgb = ((((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)) << 16) | rgb);
+
+ ((*(int *)(outptr)) = rgb);
+ outptr += 4;
+ }
+ if (num_cols & 1) {
+ r = *inptr0;
+ g = *inptr1;
+ b = *inptr2;
+ rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3));
+ *(INT16 *)outptr = (INT16)rgb;
+ }
+ }
+}
+/* We should not generate shll[2] for this test case. */
+/* { dg-final { scan-assembler-not "shll" } } */
--
2.19.1