From 5e42745cd51de92da7238b49f212617dcdfe4f32 Mon Sep 17 00:00:00 2001 From: Luca Weiss Date: Tue, 10 Oct 2023 12:26:37 +0200 Subject: [PATCH] cross/gcc-*: upgrade to 13.1.1_git20230722-r3 (MR 4465) [ci:skip-build]: Already built successfully in CI --- ...clobber-issues-with-rot32di2-on-i386.patch | 186 ++++++++++++++++++ cross/gcc-aarch64/APKBUILD | 4 +- ...clobber-issues-with-rot32di2-on-i386.patch | 186 ++++++++++++++++++ cross/gcc-armhf/APKBUILD | 4 +- ...clobber-issues-with-rot32di2-on-i386.patch | 186 ++++++++++++++++++ cross/gcc-armv7/APKBUILD | 4 +- 6 files changed, 567 insertions(+), 3 deletions(-) create mode 100644 cross/gcc-aarch64/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch create mode 100644 cross/gcc-armhf/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch create mode 100644 cross/gcc-armv7/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch diff --git a/cross/gcc-aarch64/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch b/cross/gcc-aarch64/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch new file mode 100644 index 000000000..e3c09cc84 --- /dev/null +++ b/cross/gcc-aarch64/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch @@ -0,0 +1,186 @@ +https://gcc.gnu.org/PR110792 +https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=790c1f60a5662b16eb19eb4b81922995863c7571 +https://github.com/randombit/botan/issues/3637 + +From 85628c5653ff40963158a24c60eeec6a3b5a8e56 Mon Sep 17 00:00:00 2001 +From: Roger Sayle +Date: Thu, 3 Aug 2023 07:12:04 +0100 +Subject: [PATCH] PR target/110792: Early clobber issues with + rot32di2_doubleword on i386. + +This patch is a conservative fix for PR target/110792, a wrong-code +regression affecting doubleword rotations by BITS_PER_WORD, which +effectively swaps the highpart and lowpart words, when the source to be +rotated resides in memory. The issue is that if the register used to +hold the lowpart of the destination is mentioned in the address of +the memory operand, the current define_insn_and_split unintentionally +clobbers it before reading the highpart. + +Hence, for the testcase, the incorrectly generated code looks like: + + salq $4, %rdi // calculate address + movq WHIRL_S+8(%rdi), %rdi // accidentally clobber addr + movq WHIRL_S(%rdi), %rbp // load (wrong) lowpart + +Traditionally, the textbook way to fix this would be to add an +explicit early clobber to the instruction's constraints. + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,&r") + (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") + (const_int 32)))] + +but unfortunately this currently generates significantly worse code, +due to a strange choice of reloads (effectively memcpy), which ends up +looking like: + + salq $4, %rdi // calculate address + movdqa WHIRL_S(%rdi), %xmm0 // load the double word in SSE reg. + movaps %xmm0, -16(%rsp) // store the SSE reg back to the stack + movq -8(%rsp), %rdi // load highpart + movq -16(%rsp), %rbp // load lowpart + +Note that reload's "&" doesn't distinguish between the memory being +early clobbered, vs the registers used in an addressing mode being +early clobbered. + +The fix proposed in this patch is to remove the third alternative, that +allowed offsetable memory as an operand, forcing reload to place the +operand into a register before the rotation. This results in: + + salq $4, %rdi + movq WHIRL_S(%rdi), %rax + movq WHIRL_S+8(%rdi), %rdi + movq %rax, %rbp + +I believe there's a more advanced solution, by swapping the order of +the loads (if first destination register is mentioned in the address), +or inserting a lea insn (if both destination registers are mentioned +in the address), but this fix is a minimal "safe" solution, that +should hopefully be suitable for backporting. + +2023-08-03 Roger Sayle + +gcc/ChangeLog + PR target/110792 + * config/i386/i386.md (ti3): For rotations by 64 bits + place operand in a register before gen_64ti2_doubleword. + (di3): Likewise, for rotations by 32 bits, place + operand in a register before gen_32di2_doubleword. + (32di2_doubleword): Constrain operand to be in register. + (64ti2_doubleword): Likewise. + +gcc/testsuite/ChangeLog + PR target/110792 + * g++.target/i386/pr110792.C: New 32-bit C++ test case. + * gcc.target/i386/pr110792.c: New 64-bit C test case. + +(cherry picked from commit 790c1f60a5662b16eb19eb4b81922995863c7571) +--- + gcc/config/i386/i386.md | 18 ++++++++++++------ + gcc/testsuite/g++.target/i386/pr110792.C | 16 ++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr110792.c | 18 ++++++++++++++++++ + 3 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/i386/pr110792.C + create mode 100644 gcc/testsuite/gcc.target/i386/pr110792.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index f3a3305..a71e837 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -14359,7 +14359,10 @@ + emit_insn (gen_ix86_ti3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) +- emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (TImode, operands[1]); ++ emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ } + else + { + rtx amount = force_reg (QImode, operands[2]); +@@ -14394,7 +14397,10 @@ + emit_insn (gen_ix86_di3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) +- emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (DImode, operands[1]); ++ emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ } + else + FAIL; + +@@ -14562,8 +14568,8 @@ + }) + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") + (const_int 32)))] + "!TARGET_64BIT" + "#" +@@ -14580,8 +14586,8 @@ + }) + + (define_insn_and_split "64ti2_doubleword" +- [(set (match_operand:TI 0 "register_operand" "=r,r,r") +- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:TI 0 "register_operand" "=r,r") ++ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") + (const_int 64)))] + "TARGET_64BIT" + "#" +diff --git a/gcc/testsuite/g++.target/i386/pr110792.C b/gcc/testsuite/g++.target/i386/pr110792.C +new file mode 100644 +index 0000000..ce21a7a +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/pr110792.C +@@ -0,0 +1,16 @@ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2" } */ ++ ++template ++inline T rotr(T input) ++{ ++ return static_cast((input >> ROT) | (input << (8 * sizeof(T) - ROT))); ++} ++ ++unsigned long long WHIRL_S[256] = {0x18186018C07830D8}; ++unsigned long long whirl(unsigned char x0) ++{ ++ const unsigned long long s4 = WHIRL_S[x0&0xFF]; ++ return rotr<32>(s4); ++} ++/* { dg-final { scan-assembler-not "movl\tWHIRL_S\\+4\\(,%eax,8\\), %eax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr110792.c b/gcc/testsuite/gcc.target/i386/pr110792.c +new file mode 100644 +index 0000000..b65125c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110792.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile { target int128 } } */ ++/* { dg-options "-O2" } */ ++ ++static inline unsigned __int128 rotr(unsigned __int128 input) ++{ ++ return ((input >> 64) | (input << (64))); ++} ++ ++unsigned __int128 WHIRL_S[256] = {((__int128)0x18186018C07830D8) << 64 |0x18186018C07830D8}; ++unsigned __int128 whirl(unsigned char x0) ++{ ++ register int t __asm("rdi") = x0&0xFF; ++ const unsigned __int128 s4 = WHIRL_S[t]; ++ register unsigned __int128 tt __asm("rdi") = rotr(s4); ++ asm("":::"memory"); ++ return tt; ++} ++/* { dg-final { scan-assembler-not "movq\tWHIRL_S\\+8\\(%rdi\\), %rdi" } } */ +-- +2.41.0 + diff --git a/cross/gcc-aarch64/APKBUILD b/cross/gcc-aarch64/APKBUILD index 07174d5cd..6a52a3e57 100644 --- a/cross/gcc-aarch64/APKBUILD +++ b/cross/gcc-aarch64/APKBUILD @@ -34,7 +34,7 @@ _pkgsnap="${pkgver##*_git}" [ "$CHOST" != "$CTARGET" ] && _target="-$CTARGET_ARCH" || _target="" pkgname=gcc-aarch64 -pkgrel=2 +pkgrel=3 pkgdesc="Stage2 cross-compiler for aarch64" url="https://gcc.gnu.org" arch="x86_64" @@ -269,6 +269,7 @@ source="https://dev.alpinelinux.org/archive/gcc/${_pkgbase%%.*}-$_pkgsnap/gcc-${ 0033-libphobos-do-not-use-LFS64-symbols.patch 0034-libgo-fix-lfs64-use.patch 0035-Fix-ICE-observed-in-PR110280.patch + 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch " # we build out-of-tree @@ -819,4 +820,5 @@ b325035cb7122d79c6b42ca6d3fc9e02319ed2f7cddb0639dff25d2798d2ce63812cd623462cdf95 c474f34e6f9a4239d486a65141a133dbe8ce91427d502a57a9fd6eb403478a2b5715ba74f24c1cc0761e16eec77ba2c1ca921fb7d7bc1e040fc3703fc9559e75 0033-libphobos-do-not-use-LFS64-symbols.patch c4482ffc36e7894b2140800159f4cbc9a3e9011e43a69b69f4fa92d5a11e2ee645c7e21df4423dd1e0636e8890849a5719647bfbdf84f951d638f8f488cb718c 0034-libgo-fix-lfs64-use.patch 048d767f4477c92ee6835850d13063ede21c0be751d0945c94445d04054e134cdc617eeb0b1ac8c892a604d8644580fcfebeccaf537d6b7380558ac6378e445a 0035-Fix-ICE-observed-in-PR110280.patch +cc1e10ac6e72db816f09325e301103109cc212a6f3de3ce0b9b038d149233c467319d203941695dbf3d7b9e2dcbbcd17609cdb056e831fcc323cd592423882d8 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch " diff --git a/cross/gcc-armhf/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch b/cross/gcc-armhf/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch new file mode 100644 index 000000000..e3c09cc84 --- /dev/null +++ b/cross/gcc-armhf/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch @@ -0,0 +1,186 @@ +https://gcc.gnu.org/PR110792 +https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=790c1f60a5662b16eb19eb4b81922995863c7571 +https://github.com/randombit/botan/issues/3637 + +From 85628c5653ff40963158a24c60eeec6a3b5a8e56 Mon Sep 17 00:00:00 2001 +From: Roger Sayle +Date: Thu, 3 Aug 2023 07:12:04 +0100 +Subject: [PATCH] PR target/110792: Early clobber issues with + rot32di2_doubleword on i386. + +This patch is a conservative fix for PR target/110792, a wrong-code +regression affecting doubleword rotations by BITS_PER_WORD, which +effectively swaps the highpart and lowpart words, when the source to be +rotated resides in memory. The issue is that if the register used to +hold the lowpart of the destination is mentioned in the address of +the memory operand, the current define_insn_and_split unintentionally +clobbers it before reading the highpart. + +Hence, for the testcase, the incorrectly generated code looks like: + + salq $4, %rdi // calculate address + movq WHIRL_S+8(%rdi), %rdi // accidentally clobber addr + movq WHIRL_S(%rdi), %rbp // load (wrong) lowpart + +Traditionally, the textbook way to fix this would be to add an +explicit early clobber to the instruction's constraints. + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,&r") + (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") + (const_int 32)))] + +but unfortunately this currently generates significantly worse code, +due to a strange choice of reloads (effectively memcpy), which ends up +looking like: + + salq $4, %rdi // calculate address + movdqa WHIRL_S(%rdi), %xmm0 // load the double word in SSE reg. + movaps %xmm0, -16(%rsp) // store the SSE reg back to the stack + movq -8(%rsp), %rdi // load highpart + movq -16(%rsp), %rbp // load lowpart + +Note that reload's "&" doesn't distinguish between the memory being +early clobbered, vs the registers used in an addressing mode being +early clobbered. + +The fix proposed in this patch is to remove the third alternative, that +allowed offsetable memory as an operand, forcing reload to place the +operand into a register before the rotation. This results in: + + salq $4, %rdi + movq WHIRL_S(%rdi), %rax + movq WHIRL_S+8(%rdi), %rdi + movq %rax, %rbp + +I believe there's a more advanced solution, by swapping the order of +the loads (if first destination register is mentioned in the address), +or inserting a lea insn (if both destination registers are mentioned +in the address), but this fix is a minimal "safe" solution, that +should hopefully be suitable for backporting. + +2023-08-03 Roger Sayle + +gcc/ChangeLog + PR target/110792 + * config/i386/i386.md (ti3): For rotations by 64 bits + place operand in a register before gen_64ti2_doubleword. + (di3): Likewise, for rotations by 32 bits, place + operand in a register before gen_32di2_doubleword. + (32di2_doubleword): Constrain operand to be in register. + (64ti2_doubleword): Likewise. + +gcc/testsuite/ChangeLog + PR target/110792 + * g++.target/i386/pr110792.C: New 32-bit C++ test case. + * gcc.target/i386/pr110792.c: New 64-bit C test case. + +(cherry picked from commit 790c1f60a5662b16eb19eb4b81922995863c7571) +--- + gcc/config/i386/i386.md | 18 ++++++++++++------ + gcc/testsuite/g++.target/i386/pr110792.C | 16 ++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr110792.c | 18 ++++++++++++++++++ + 3 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/i386/pr110792.C + create mode 100644 gcc/testsuite/gcc.target/i386/pr110792.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index f3a3305..a71e837 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -14359,7 +14359,10 @@ + emit_insn (gen_ix86_ti3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) +- emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (TImode, operands[1]); ++ emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ } + else + { + rtx amount = force_reg (QImode, operands[2]); +@@ -14394,7 +14397,10 @@ + emit_insn (gen_ix86_di3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) +- emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (DImode, operands[1]); ++ emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ } + else + FAIL; + +@@ -14562,8 +14568,8 @@ + }) + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") + (const_int 32)))] + "!TARGET_64BIT" + "#" +@@ -14580,8 +14586,8 @@ + }) + + (define_insn_and_split "64ti2_doubleword" +- [(set (match_operand:TI 0 "register_operand" "=r,r,r") +- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:TI 0 "register_operand" "=r,r") ++ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") + (const_int 64)))] + "TARGET_64BIT" + "#" +diff --git a/gcc/testsuite/g++.target/i386/pr110792.C b/gcc/testsuite/g++.target/i386/pr110792.C +new file mode 100644 +index 0000000..ce21a7a +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/pr110792.C +@@ -0,0 +1,16 @@ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2" } */ ++ ++template ++inline T rotr(T input) ++{ ++ return static_cast((input >> ROT) | (input << (8 * sizeof(T) - ROT))); ++} ++ ++unsigned long long WHIRL_S[256] = {0x18186018C07830D8}; ++unsigned long long whirl(unsigned char x0) ++{ ++ const unsigned long long s4 = WHIRL_S[x0&0xFF]; ++ return rotr<32>(s4); ++} ++/* { dg-final { scan-assembler-not "movl\tWHIRL_S\\+4\\(,%eax,8\\), %eax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr110792.c b/gcc/testsuite/gcc.target/i386/pr110792.c +new file mode 100644 +index 0000000..b65125c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110792.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile { target int128 } } */ ++/* { dg-options "-O2" } */ ++ ++static inline unsigned __int128 rotr(unsigned __int128 input) ++{ ++ return ((input >> 64) | (input << (64))); ++} ++ ++unsigned __int128 WHIRL_S[256] = {((__int128)0x18186018C07830D8) << 64 |0x18186018C07830D8}; ++unsigned __int128 whirl(unsigned char x0) ++{ ++ register int t __asm("rdi") = x0&0xFF; ++ const unsigned __int128 s4 = WHIRL_S[t]; ++ register unsigned __int128 tt __asm("rdi") = rotr(s4); ++ asm("":::"memory"); ++ return tt; ++} ++/* { dg-final { scan-assembler-not "movq\tWHIRL_S\\+8\\(%rdi\\), %rdi" } } */ +-- +2.41.0 + diff --git a/cross/gcc-armhf/APKBUILD b/cross/gcc-armhf/APKBUILD index 4aac4b852..be637d1b0 100644 --- a/cross/gcc-armhf/APKBUILD +++ b/cross/gcc-armhf/APKBUILD @@ -34,7 +34,7 @@ _pkgsnap="${pkgver##*_git}" [ "$CHOST" != "$CTARGET" ] && _target="-$CTARGET_ARCH" || _target="" pkgname=gcc-armhf -pkgrel=2 +pkgrel=3 pkgdesc="Stage2 cross-compiler for armhf" url="https://gcc.gnu.org" arch="x86_64" @@ -269,6 +269,7 @@ source="https://dev.alpinelinux.org/archive/gcc/${_pkgbase%%.*}-$_pkgsnap/gcc-${ 0033-libphobos-do-not-use-LFS64-symbols.patch 0034-libgo-fix-lfs64-use.patch 0035-Fix-ICE-observed-in-PR110280.patch + 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch " # we build out-of-tree @@ -819,4 +820,5 @@ b325035cb7122d79c6b42ca6d3fc9e02319ed2f7cddb0639dff25d2798d2ce63812cd623462cdf95 c474f34e6f9a4239d486a65141a133dbe8ce91427d502a57a9fd6eb403478a2b5715ba74f24c1cc0761e16eec77ba2c1ca921fb7d7bc1e040fc3703fc9559e75 0033-libphobos-do-not-use-LFS64-symbols.patch c4482ffc36e7894b2140800159f4cbc9a3e9011e43a69b69f4fa92d5a11e2ee645c7e21df4423dd1e0636e8890849a5719647bfbdf84f951d638f8f488cb718c 0034-libgo-fix-lfs64-use.patch 048d767f4477c92ee6835850d13063ede21c0be751d0945c94445d04054e134cdc617eeb0b1ac8c892a604d8644580fcfebeccaf537d6b7380558ac6378e445a 0035-Fix-ICE-observed-in-PR110280.patch +cc1e10ac6e72db816f09325e301103109cc212a6f3de3ce0b9b038d149233c467319d203941695dbf3d7b9e2dcbbcd17609cdb056e831fcc323cd592423882d8 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch " diff --git a/cross/gcc-armv7/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch b/cross/gcc-armv7/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch new file mode 100644 index 000000000..e3c09cc84 --- /dev/null +++ b/cross/gcc-armv7/0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch @@ -0,0 +1,186 @@ +https://gcc.gnu.org/PR110792 +https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=790c1f60a5662b16eb19eb4b81922995863c7571 +https://github.com/randombit/botan/issues/3637 + +From 85628c5653ff40963158a24c60eeec6a3b5a8e56 Mon Sep 17 00:00:00 2001 +From: Roger Sayle +Date: Thu, 3 Aug 2023 07:12:04 +0100 +Subject: [PATCH] PR target/110792: Early clobber issues with + rot32di2_doubleword on i386. + +This patch is a conservative fix for PR target/110792, a wrong-code +regression affecting doubleword rotations by BITS_PER_WORD, which +effectively swaps the highpart and lowpart words, when the source to be +rotated resides in memory. The issue is that if the register used to +hold the lowpart of the destination is mentioned in the address of +the memory operand, the current define_insn_and_split unintentionally +clobbers it before reading the highpart. + +Hence, for the testcase, the incorrectly generated code looks like: + + salq $4, %rdi // calculate address + movq WHIRL_S+8(%rdi), %rdi // accidentally clobber addr + movq WHIRL_S(%rdi), %rbp // load (wrong) lowpart + +Traditionally, the textbook way to fix this would be to add an +explicit early clobber to the instruction's constraints. + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") ++ [(set (match_operand:DI 0 "register_operand" "=r,r,&r") + (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") + (const_int 32)))] + +but unfortunately this currently generates significantly worse code, +due to a strange choice of reloads (effectively memcpy), which ends up +looking like: + + salq $4, %rdi // calculate address + movdqa WHIRL_S(%rdi), %xmm0 // load the double word in SSE reg. + movaps %xmm0, -16(%rsp) // store the SSE reg back to the stack + movq -8(%rsp), %rdi // load highpart + movq -16(%rsp), %rbp // load lowpart + +Note that reload's "&" doesn't distinguish between the memory being +early clobbered, vs the registers used in an addressing mode being +early clobbered. + +The fix proposed in this patch is to remove the third alternative, that +allowed offsetable memory as an operand, forcing reload to place the +operand into a register before the rotation. This results in: + + salq $4, %rdi + movq WHIRL_S(%rdi), %rax + movq WHIRL_S+8(%rdi), %rdi + movq %rax, %rbp + +I believe there's a more advanced solution, by swapping the order of +the loads (if first destination register is mentioned in the address), +or inserting a lea insn (if both destination registers are mentioned +in the address), but this fix is a minimal "safe" solution, that +should hopefully be suitable for backporting. + +2023-08-03 Roger Sayle + +gcc/ChangeLog + PR target/110792 + * config/i386/i386.md (ti3): For rotations by 64 bits + place operand in a register before gen_64ti2_doubleword. + (di3): Likewise, for rotations by 32 bits, place + operand in a register before gen_32di2_doubleword. + (32di2_doubleword): Constrain operand to be in register. + (64ti2_doubleword): Likewise. + +gcc/testsuite/ChangeLog + PR target/110792 + * g++.target/i386/pr110792.C: New 32-bit C++ test case. + * gcc.target/i386/pr110792.c: New 64-bit C test case. + +(cherry picked from commit 790c1f60a5662b16eb19eb4b81922995863c7571) +--- + gcc/config/i386/i386.md | 18 ++++++++++++------ + gcc/testsuite/g++.target/i386/pr110792.C | 16 ++++++++++++++++ + gcc/testsuite/gcc.target/i386/pr110792.c | 18 ++++++++++++++++++ + 3 files changed, 46 insertions(+), 6 deletions(-) + create mode 100644 gcc/testsuite/g++.target/i386/pr110792.C + create mode 100644 gcc/testsuite/gcc.target/i386/pr110792.c + +diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md +index f3a3305..a71e837 100644 +--- a/gcc/config/i386/i386.md ++++ b/gcc/config/i386/i386.md +@@ -14359,7 +14359,10 @@ + emit_insn (gen_ix86_ti3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 64) +- emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (TImode, operands[1]); ++ emit_insn (gen_64ti2_doubleword (operands[0], operands[1])); ++ } + else + { + rtx amount = force_reg (QImode, operands[2]); +@@ -14394,7 +14397,10 @@ + emit_insn (gen_ix86_di3_doubleword + (operands[0], operands[1], operands[2])); + else if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 32) +- emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ { ++ operands[1] = force_reg (DImode, operands[1]); ++ emit_insn (gen_32di2_doubleword (operands[0], operands[1])); ++ } + else + FAIL; + +@@ -14562,8 +14568,8 @@ + }) + + (define_insn_and_split "32di2_doubleword" +- [(set (match_operand:DI 0 "register_operand" "=r,r,r") +- (any_rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:DI 0 "register_operand" "=r,r") ++ (any_rotate:DI (match_operand:DI 1 "register_operand" "0,r") + (const_int 32)))] + "!TARGET_64BIT" + "#" +@@ -14580,8 +14586,8 @@ + }) + + (define_insn_and_split "64ti2_doubleword" +- [(set (match_operand:TI 0 "register_operand" "=r,r,r") +- (any_rotate:TI (match_operand:TI 1 "nonimmediate_operand" "0,r,o") ++ [(set (match_operand:TI 0 "register_operand" "=r,r") ++ (any_rotate:TI (match_operand:TI 1 "register_operand" "0,r") + (const_int 64)))] + "TARGET_64BIT" + "#" +diff --git a/gcc/testsuite/g++.target/i386/pr110792.C b/gcc/testsuite/g++.target/i386/pr110792.C +new file mode 100644 +index 0000000..ce21a7a +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/pr110792.C +@@ -0,0 +1,16 @@ ++/* { dg-do compile { target ia32 } } */ ++/* { dg-options "-O2" } */ ++ ++template ++inline T rotr(T input) ++{ ++ return static_cast((input >> ROT) | (input << (8 * sizeof(T) - ROT))); ++} ++ ++unsigned long long WHIRL_S[256] = {0x18186018C07830D8}; ++unsigned long long whirl(unsigned char x0) ++{ ++ const unsigned long long s4 = WHIRL_S[x0&0xFF]; ++ return rotr<32>(s4); ++} ++/* { dg-final { scan-assembler-not "movl\tWHIRL_S\\+4\\(,%eax,8\\), %eax" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr110792.c b/gcc/testsuite/gcc.target/i386/pr110792.c +new file mode 100644 +index 0000000..b65125c +--- /dev/null ++++ b/gcc/testsuite/gcc.target/i386/pr110792.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile { target int128 } } */ ++/* { dg-options "-O2" } */ ++ ++static inline unsigned __int128 rotr(unsigned __int128 input) ++{ ++ return ((input >> 64) | (input << (64))); ++} ++ ++unsigned __int128 WHIRL_S[256] = {((__int128)0x18186018C07830D8) << 64 |0x18186018C07830D8}; ++unsigned __int128 whirl(unsigned char x0) ++{ ++ register int t __asm("rdi") = x0&0xFF; ++ const unsigned __int128 s4 = WHIRL_S[t]; ++ register unsigned __int128 tt __asm("rdi") = rotr(s4); ++ asm("":::"memory"); ++ return tt; ++} ++/* { dg-final { scan-assembler-not "movq\tWHIRL_S\\+8\\(%rdi\\), %rdi" } } */ +-- +2.41.0 + diff --git a/cross/gcc-armv7/APKBUILD b/cross/gcc-armv7/APKBUILD index 3c04872f0..63211ad8e 100644 --- a/cross/gcc-armv7/APKBUILD +++ b/cross/gcc-armv7/APKBUILD @@ -34,7 +34,7 @@ _pkgsnap="${pkgver##*_git}" [ "$CHOST" != "$CTARGET" ] && _target="-$CTARGET_ARCH" || _target="" pkgname=gcc-armv7 -pkgrel=2 +pkgrel=3 pkgdesc="Stage2 cross-compiler for armv7" url="https://gcc.gnu.org" arch="x86_64" @@ -269,6 +269,7 @@ source="https://dev.alpinelinux.org/archive/gcc/${_pkgbase%%.*}-$_pkgsnap/gcc-${ 0033-libphobos-do-not-use-LFS64-symbols.patch 0034-libgo-fix-lfs64-use.patch 0035-Fix-ICE-observed-in-PR110280.patch + 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch " # we build out-of-tree @@ -819,4 +820,5 @@ b325035cb7122d79c6b42ca6d3fc9e02319ed2f7cddb0639dff25d2798d2ce63812cd623462cdf95 c474f34e6f9a4239d486a65141a133dbe8ce91427d502a57a9fd6eb403478a2b5715ba74f24c1cc0761e16eec77ba2c1ca921fb7d7bc1e040fc3703fc9559e75 0033-libphobos-do-not-use-LFS64-symbols.patch c4482ffc36e7894b2140800159f4cbc9a3e9011e43a69b69f4fa92d5a11e2ee645c7e21df4423dd1e0636e8890849a5719647bfbdf84f951d638f8f488cb718c 0034-libgo-fix-lfs64-use.patch 048d767f4477c92ee6835850d13063ede21c0be751d0945c94445d04054e134cdc617eeb0b1ac8c892a604d8644580fcfebeccaf537d6b7380558ac6378e445a 0035-Fix-ICE-observed-in-PR110280.patch +cc1e10ac6e72db816f09325e301103109cc212a6f3de3ce0b9b038d149233c467319d203941695dbf3d7b9e2dcbbcd17609cdb056e831fcc323cd592423882d8 0036-PR110792-Early-clobber-issues-with-rot32di2-on-i386.patch "