[PATCH v2] x86/APX: optimize MOVBE
Jan Beulich
jbeulich@suse.com
Fri Jan 19 10:51:44 GMT 2024
With identical source and destination it can be covered by the NDD-to-
legacy conversion logic as well, even if in this case the original insn
doesn't use an NDD encoding. The size savings are even better here, for
the replacement (BSWAP) not having a ModR/M byte.
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7858,10 +7858,11 @@ match_template (char mnem_suffix)
if (optimize
&& !i.no_optimize
&& i.vec_encoding != vex_encoding_evex
- && t + 1 < current_templates.end
- && !t[1].opcode_modifier.evex
- && t[1].opcode_space <= SPACE_0F38
- && t->opcode_modifier.vexvvvv == VexVVVV_DST
+ && ((t + 1 < current_templates.end
+ && !t[1].opcode_modifier.evex
+ && t[1].opcode_space <= SPACE_0F38
+ && t->opcode_modifier.vexvvvv == VexVVVV_DST)
+ || t->mnem_off == MN_movbe)
&& (i.types[i.operands - 1].bitfield.dword
|| i.types[i.operands - 1].bitfield.qword))
{
@@ -7898,6 +7899,12 @@ match_template (char mnem_suffix)
--i.operands;
--i.reg_operands;
+ if (t->mnem_off == MN_movbe)
+ {
+ gas_assert (t[1].mnem_off == MN_bswap);
+ ++current_templates.end;
+ }
+
specific_error = progress (internal_error);
continue;
}
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -118,6 +118,9 @@ Disassembly of section .text:
\s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 cmovge -0x6f6f6f70\(%eax\),%edx
\s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 cmovle -0x6f6f6f70\(%eax\),%edx
\s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 cmovg -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0 movbe %ax,%ax
+\s*[a-f0-9]+:\s*49 0f c8 bswap %r8
+\s*[a-f0-9]+:\s*d5 98 c8 bswap %r16
\s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax
\s*[a-f0-9]+:\s*66 0f 38 f6 c3 adcx %ebx,%eax
\s*[a-f0-9]+:\s*62 f4 fd 18 66 c3 adcx %rbx,%rax,%rax
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -111,6 +111,9 @@ cmovl 0x90909090(%eax),%edx,%edx
cmovge 0x90909090(%eax),%edx,%edx
cmovle 0x90909090(%eax),%edx,%edx
cmovg 0x90909090(%eax),%edx,%edx
+movbe %ax,%ax
+movbe %r8,%r8
+movbe %r16,%r16
adcx %ebx,%eax,%eax
adcx %eax,%ebx,%eax
adcx %rbx,%rax,%rax
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSi
// Move after swapping the bytes
movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// This needs to live here for easy EVEX -> REX2 conversion, which wants to
+// restart with the next sequential template.
+bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
// Move with sign extend.
movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -980,9 +983,8 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
{<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {}
-// 486 extensions.
+// 486 extensions (BSWAP moved elsewhere).
-bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
invd, 0xf08, i486, NoSuf, {}
More information about the Binutils
mailing list