From 828cc9e3083f399ca550ba9617b20fc282a73896 Mon Sep 17 00:00:00 2001 From: Diachkov Ilia Date: Thu, 29 Aug 2024 19:41:23 +0800 Subject: [PATCH] Correct shll/shll2 patterns --- gcc/config/aarch64/aarch64-simd.md | 34 +++++----- gcc/config/aarch64/predicates.md | 14 +++- gcc/testsuite/gcc.target/aarch64/jdcolor_le.c | 68 +++++++++++++++++++ 3 files changed, 98 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/jdcolor_le.c diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 754343abc..523423784 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4712,16 +4712,16 @@ [(set_attr "type" "neon_shift_imm_long")] ) -(define_insn "*aarch64_simd_vec_unpacks_lo_shiftsi" - [(set (match_operand:V4SI 0 "register_operand" "=w") - (ashift:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "w") - (match_operand:V8HI 2 "vect_par_cnst_lo_half" ""))) - (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))] +(define_insn "*aarch64_simd_vec_unpacks_lo_shift" + [(set (match_operand: 0 "register_operand" "=w") + (ashift: + (sign_extend: + (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))) + (match_operand: 3 "aarch64_simd_shift_imm_bitsize_" "i")))] "TARGET_SIMD" - "shll\t%0.4s, %1.4h, #%3" + "shll\t%0., %1., #%3" [(set_attr "type" "neon_shift_imm_long")] ) @@ -4743,15 +4743,15 @@ ) (define_insn "*aarch64_simd_vec_unpacks_hi_shiftsi" - [(set (match_operand:V4SI 0 "register_operand" "=w") - (ashift:V4SI - (sign_extend:V4SI - (vec_select:V4HI - (match_operand:V8HI 1 "register_operand" "w") - (match_operand:V8HI 2 "vect_par_cnst_hi_half" ""))) - (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))] + [(set (match_operand: 0 "register_operand" "=w") + (ashift: + (sign_extend: + (vec_select: + (match_operand:VQW 1 "register_operand" "w") + (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))) + (match_operand: 3 "aarch64_simd_shift_imm_bitsize_" "i")))] "TARGET_SIMD" - "shll2\t%0.4s, %1.8h, #%3" + "shll2\t%0., %1., #%3" [(set_attr "type" "neon_shift_imm_long")] ) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index d0a55b44a..72c7ece57 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -617,11 +617,23 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 64)"))) +(define_predicate "aarch64_simd_shift_imm_bitsize_v16qi" + (match_code "const_vector") +{ + return INTVAL (unwrap_const_vec_duplicate (op)) == 8; +}) + +(define_predicate "aarch64_simd_shift_imm_bitsize_v8hi" + (match_code "const_vector") +{ + return INTVAL (unwrap_const_vec_duplicate (op)) == 16; +}) + (define_predicate "aarch64_simd_shift_imm_bitsize_v4si" (match_code "const_vector") { HOST_WIDE_INT val = INTVAL (unwrap_const_vec_duplicate (op)); - return val == 8 || val == 16 || val == 32; + return INTVAL (unwrap_const_vec_duplicate (op)) == 32; }) (define_predicate "aarch64_constant_pool_symref" diff --git a/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c new file mode 100644 index 000000000..2b66b13c1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/jdcolor_le.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* It's a preprocessed part of libjpeg-turbo. */ + +typedef int boolean; +typedef short INT16; +typedef unsigned short UINT16; +typedef long unsigned int size_t; +typedef long JLONG; +typedef unsigned int JDIMENSION; +typedef short J12SAMPLE; +typedef J12SAMPLE *J12SAMPROW; +typedef J12SAMPROW *J12SAMPARRAY; +typedef J12SAMPARRAY *J12SAMPIMAGE; + +void +rgb_rgb565_convert_le(JDIMENSION num_cols, J12SAMPIMAGE input_buf, + JDIMENSION input_row, J12SAMPARRAY output_buf, + int num_rows) +{ + register J12SAMPROW outptr; + register J12SAMPROW inptr0, inptr1, inptr2; + register JDIMENSION col; + + + while (--num_rows >= 0) { + JLONG rgb; + unsigned int r, g, b; + + inptr0 = input_buf[0][input_row]; + inptr1 = input_buf[1][input_row]; + inptr2 = input_buf[2][input_row]; + input_row++; + outptr = *output_buf++; + if ((((size_t)(outptr)) & 3)) { + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; + rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)); + *(INT16 *)outptr = (INT16)rgb; + outptr += 2; + num_cols--; + } + for (col = 0; col < (num_cols >> 1); col++) { + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; + rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)); + + r = *inptr0++; + g = *inptr1++; + b = *inptr2++; + rgb = ((((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)) << 16) | rgb); + + ((*(int *)(outptr)) = rgb); + outptr += 4; + } + if (num_cols & 1) { + r = *inptr0; + g = *inptr1; + b = *inptr2; + rgb = ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3)); + *(INT16 *)outptr = (INT16)rgb; + } + } +} +/* We should not generate shll[2] for this test case. */ +/* { dg-final { scan-assembler-not "shll" } } */ -- 2.19.1