[PATCH v2] x86/APX: optimize MOVBE

Jan Beulich jbeulich@suse.com
Fri Jan 19 10:51:44 GMT 2024


With identical source and destination it can be covered by the NDD-to-
legacy conversion logic as well, even if in this case the original insn
doesn't use an NDD encoding. The size savings are even better here, for
the replacement (BSWAP) not having a ModR/M byte.

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -7858,10 +7858,11 @@ match_template (char mnem_suffix)
       if (optimize
 	  && !i.no_optimize
 	  && i.vec_encoding != vex_encoding_evex
-	  && t + 1 < current_templates.end
-	  && !t[1].opcode_modifier.evex
-	  && t[1].opcode_space <= SPACE_0F38
-	  && t->opcode_modifier.vexvvvv == VexVVVV_DST
+	  && ((t + 1 < current_templates.end
+	       && !t[1].opcode_modifier.evex
+	       && t[1].opcode_space <= SPACE_0F38
+	       && t->opcode_modifier.vexvvvv == VexVVVV_DST)
+	      || t->mnem_off == MN_movbe)
 	  && (i.types[i.operands - 1].bitfield.dword
 	      || i.types[i.operands - 1].bitfield.qword))
 	{
@@ -7898,6 +7899,12 @@ match_template (char mnem_suffix)
 		  --i.operands;
 		  --i.reg_operands;
 
+		  if (t->mnem_off == MN_movbe)
+		    {
+		      gas_assert (t[1].mnem_off == MN_bswap);
+		      ++current_templates.end;
+		    }
+
 		  specific_error = progress (internal_error);
 		  continue;
 		}
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -118,6 +118,9 @@ Disassembly of section .text:
 \s*[a-f0-9]+:\s*67 0f 4d 90 90 90 90 90 	cmovge -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4e 90 90 90 90 90 	cmovle -0x6f6f6f70\(%eax\),%edx
 \s*[a-f0-9]+:\s*67 0f 4f 90 90 90 90 90 	cmovg  -0x6f6f6f70\(%eax\),%edx
+\s*[a-f0-9]+:\s*62 f4 7d 08 60 c0    	movbe  %ax,%ax
+\s*[a-f0-9]+:\s*49 0f c8             	bswap  %r8
+\s*[a-f0-9]+:\s*d5 98 c8             	bswap  %r16
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*66 0f 38 f6 c3       	adcx   %ebx,%eax
 \s*[a-f0-9]+:\s*62 f4 fd 18 66 c3    	adcx   %rbx,%rax,%rax
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.s
@@ -111,6 +111,9 @@ cmovl  0x90909090(%eax),%edx,%edx
 cmovge 0x90909090(%eax),%edx,%edx
 cmovle 0x90909090(%eax),%edx,%edx
 cmovg  0x90909090(%eax),%edx,%edx
+movbe  %ax,%ax
+movbe  %r8,%r8
+movbe  %r16,%r16
 adcx   %ebx,%eax,%eax
 adcx   %eax,%ebx,%eax
 adcx   %rbx,%rax,%rax
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -210,6 +210,9 @@ mov, 0xf24, i386&No64, D|RegMem|IgnoreSi
 // Move after swapping the bytes
 movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 movbe, 0x60, Movbe&APX_F, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+// This needs to live here for easy EVEX -> REX2 conversion, which wants to
+// restart with the next sequential template.
+bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
 
 // Move with sign extend.
 movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -980,9 +983,8 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
 
 {<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {}
 
-// 486 extensions.
+// 486 extensions (BSWAP moved elsewhere).
 
-bswap, 0xfc8, i486, No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64 }
 xadd, 0xfc0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 cmpxchg, 0xfb0, i486, W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 invd, 0xf08, i486, NoSuf, {}


More information about the Binutils mailing list