This is the mail archive of the
binutils@sourceware.org
mailing list for the binutils project.
PATCH: Optimize out the REX.W bit
- From: "H.J. Lu" <hjl at lucon dot org>
- To: binutils at sources dot redhat dot com
- Cc: michael dot meissner at amd dot com
- Date: Fri, 4 Jan 2008 09:58:31 -0800
- Subject: PATCH: Optimize out the REX.W bit
The REX.W bit is ignored for extractps, movmskpd, movmskps, pextrb,
pextrw, pinsrb, pinsrw and pmovmskb. I am checking in this patch
to optimize it out.
H.J.
---
gas/testsuite/
2008-01-04 H.J. Lu <hongjiu.lu@intel.com>
* gas/i386/rexw.d: New.
* gas/i386/rexw.s: Likewise.
* gas/i386/x86-64-sse4_1-intel.d: Updated.
* gas/i386/x86-64-sse4_1.d: Likewise.
opcodes/
2008-01-04 H.J. Lu <hongjiu.lu@intel.com>
* i386-opc.tbl: Add NoRex64 to extractps, movmskpd, movmskps,
pextrb, pextrw, pinsrb, pinsrw and pmovmskb.
* i386-tbl.h: Regenerated.
--- binutils/gas/testsuite/gas/i386/i386.exp.rexw 2008-01-03 17:05:56.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/i386.exp 2008-01-04 09:27:18.000000000 -0800
@@ -216,6 +216,7 @@ if [expr ([istarget "i*86-*-*"] || [ista
run_dump_test "x86-64-sib-intel"
run_dump_test "x86-64-opcode-inval"
run_dump_test "x86-64-opcode-inval-intel"
+ run_dump_test "rexw"
if { ![istarget "*-*-aix*"]
&& ![istarget "*-*-beos*"]
--- binutils/gas/testsuite/gas/i386/rexw.d.rexw 2008-01-04 09:26:16.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/rexw.d 2008-01-04 09:46:53.000000000 -0800
@@ -0,0 +1,31 @@
+#objdump: -dw
+#name: x86-64 REX.W optimization
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 50 ca movmskpd %xmm2,%ecx
+[ ]*[a-f0-9]+: 0f 50 ca movmskps %xmm2,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 0f c5 c8 00 pextrw \$0x0,%mm0,%ecx
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 66 0f c4 c1 00 pinsrw \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 0f c4 c1 00 pinsrw \$0x0,%ecx,%mm0
+[ ]*[a-f0-9]+: 0f d7 c5 pmovmskb %mm5,%eax
+[ ]*[a-f0-9]+: 66 0f d7 c5 pmovmskb %xmm5,%eax
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 50 ca movmskpd %xmm2,%ecx
+[ ]*[a-f0-9]+: 0f 50 ca movmskps %xmm2,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 0f c5 c8 00 pextrw \$0x0,%mm0,%ecx
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 0f c4 c1 00 pinsrw \$0x0,%ecx,%mm0
+[ ]*[a-f0-9]+: 66 0f c4 c1 00 pinsrw \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 0f d7 cd pmovmskb %mm5,%ecx
+[ ]*[a-f0-9]+: 66 0f d7 cd pmovmskb %xmm5,%ecx
+#pass
--- binutils/gas/testsuite/gas/i386/rexw.s.rexw 2008-01-04 09:26:16.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/rexw.s 2008-01-04 09:47:11.000000000 -0800
@@ -0,0 +1,28 @@
+# Instructions where REX.W is ignored.
+
+ .text
+foo:
+ extractps $0x0,%xmm0,%rcx
+ movmskpd %xmm2,%rcx
+ movmskps %xmm2,%rcx
+ pextrb $0x0,%xmm0,%rcx
+ pextrw $0x0,%mm0,%rcx
+ pextrw $0x0,%xmm0,%rcx
+ pinsrb $0x0,%rcx,%xmm0
+ pinsrw $0x0,%rcx,%xmm0
+ pinsrw $0x0,%rcx,%mm0
+ pmovmskb %mm5,%rax
+ pmovmskb %xmm5,%rax
+
+ .intel_syntax noprefix
+ extractps rcx,xmm0,0x0
+ movmskpd rcx,xmm2
+ movmskps rcx,xmm2
+ pextrb rcx,xmm0,0x0
+ pextrw rcx,mm0,0x0
+ pextrw rcx,xmm0,0x0
+ pinsrb xmm0,rcx,0x0
+ pinsrw mm0,rcx,0x0
+ pinsrw xmm0,rcx,0x0
+ pmovmskb rcx,mm5
+ pmovmskb rcx,xmm5
--- binutils/gas/testsuite/gas/i386/x86-64-sse4_1-intel.d.rexw 2007-09-12 11:46:41.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-sse4_1-intel.d 2008-01-04 09:26:16.000000000 -0800
@@ -11,19 +11,19 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 66 0f 3a 0d c1 00 blendpd xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 3a 0c 01 00 blendps xmm0,XMMWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 0c c1 00 blendps xmm0,xmm1,0x0
-[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd xmm0,xmm1(,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd xmm0,xmm1(,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps xmm0,xmm1(,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps xmm0,xmm1(,xmm0)?
+[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd xmm0,xmm1,xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd xmm0,xmm1,xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps xmm0,xmm1,xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps xmm0,xmm1,xmm0
[ ]*[a-f0-9]+: 66 0f 3a 41 01 00 dppd xmm0,XMMWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 41 c1 00 dppd xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 3a 40 01 00 dpps xmm0,XMMWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 40 c1 00 dpps xmm0,xmm1,0x0
-[ ]*[a-f0-9]+: 66 48 0f 3a 17 c1 00 extractps rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 17 01 00 extractps DWORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 21 c1 00 insertps xmm0,xmm1,0x0
@@ -33,29 +33,29 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 66 0f 3a 42 c1 00 mpsadbw xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 38 2b 01 packusdw xmm0,XMMWORD PTR \[rcx\]
[ ]*[a-f0-9]+: 66 0f 38 2b c1 packusdw xmm0,xmm1
-[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb xmm0,xmm1(,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb xmm0,xmm1(,xmm0)?
+[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb xmm0,xmm1,xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb xmm0,XMMWORD PTR \[rcx\],xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb xmm0,xmm1,xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0e 01 00 pblendw xmm0,XMMWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 0e c1 00 pblendw xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 38 29 c1 pcmpeqq xmm0,xmm1
[ ]*[a-f0-9]+: 66 0f 38 29 01 pcmpeqq xmm0,XMMWORD PTR \[rcx\]
-[ ]*[a-f0-9]+: 66 48 0f 3a 14 c1 00 pextrb rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 14 01 00 pextrb BYTE PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 16 c1 00 pextrd ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 16 01 00 pextrd DWORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 16 c1 00 pextrq rcx,xmm0,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 16 01 00 pextrq QWORD PTR \[rcx\],xmm0,0x0
-[ ]*[a-f0-9]+: 66 48 0f c5 c8 00 pextrw rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 15 01 00 pextrw WORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 38 41 c1 phminposuw xmm0,xmm1
[ ]*[a-f0-9]+: 66 0f 38 41 01 phminposuw xmm0,XMMWORD PTR \[rcx\]
[ ]*[a-f0-9]+: 66 0f 3a 20 01 00 pinsrb xmm0,BYTE PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb xmm0,ecx,0x0
-[ ]*[a-f0-9]+: 66 48 0f 3a 20 c1 00 pinsrb xmm0,rcx,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb xmm0,ecx,0x0
[ ]*[a-f0-9]+: 66 0f 3a 22 01 00 pinsrd xmm0,DWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 22 c1 00 pinsrd xmm0,ecx,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 22 01 00 pinsrq xmm0,QWORD PTR \[rcx\],0x0
@@ -126,7 +126,7 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 66 0f 3a 41 c1 00 dppd xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 3a 40 01 00 dpps xmm0,XMMWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 40 c1 00 dpps xmm0,xmm1,0x0
-[ ]*[a-f0-9]+: 66 48 0f 3a 17 c1 00 extractps rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 17 01 00 extractps DWORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 21 c1 00 insertps xmm0,xmm1,0x0
@@ -142,21 +142,21 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 66 0f 3a 0e c1 00 pblendw xmm0,xmm1,0x0
[ ]*[a-f0-9]+: 66 0f 38 29 c1 pcmpeqq xmm0,xmm1
[ ]*[a-f0-9]+: 66 0f 38 29 01 pcmpeqq xmm0,XMMWORD PTR \[rcx\]
-[ ]*[a-f0-9]+: 66 48 0f 3a 14 c1 00 pextrb rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 14 01 00 pextrb BYTE PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 16 c1 00 pextrd ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 16 01 00 pextrd DWORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 16 c1 00 pextrq rcx,xmm0,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 16 01 00 pextrq QWORD PTR \[rcx\],xmm0,0x0
-[ ]*[a-f0-9]+: 66 48 0f c5 c8 00 pextrw rcx,xmm0,0x0
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw ecx,xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 3a 15 01 00 pextrw WORD PTR \[rcx\],xmm0,0x0
[ ]*[a-f0-9]+: 66 0f 38 41 c1 phminposuw xmm0,xmm1
[ ]*[a-f0-9]+: 66 0f 38 41 01 phminposuw xmm0,XMMWORD PTR \[rcx\]
[ ]*[a-f0-9]+: 66 0f 3a 20 01 00 pinsrb xmm0,BYTE PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb xmm0,ecx,0x0
-[ ]*[a-f0-9]+: 66 48 0f 3a 20 c1 00 pinsrb xmm0,rcx,0x0
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb xmm0,ecx,0x0
[ ]*[a-f0-9]+: 66 0f 3a 22 01 00 pinsrd xmm0,DWORD PTR \[rcx\],0x0
[ ]*[a-f0-9]+: 66 0f 3a 22 c1 00 pinsrd xmm0,ecx,0x0
[ ]*[a-f0-9]+: 66 48 0f 3a 22 01 00 pinsrq xmm0,QWORD PTR \[rcx\],0x0
--- binutils/gas/testsuite/gas/i386/x86-64-sse4_1.d.rexw 2007-09-12 11:46:41.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-sse4_1.d 2008-01-04 09:26:16.000000000 -0800
@@ -6,126 +6,23 @@
Disassembly of section .text:
0+000 <foo>:
-[ ]*[0-9a-f]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 15 01 blendvpd (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 15 c1 blendvpd (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 14 01 blendvps (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 14 c1 blendvps (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 48 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%rcx
-[ ]*[0-9a-f]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
-[ ]*[0-9a-f]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%rcx\)
-[ ]*[0-9a-f]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 2a 01 movntdqa \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 2b 01 packusdw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 10 01 pblendvb (%xmm0,)?\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 10 c1 pblendvb (%xmm0,)?%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 29 01 pcmpeqq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 48 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%rcx
-[ ]*[0-9a-f]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
-[ ]*[0-9a-f]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%rcx\)
-[ ]*[0-9a-f]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx
-[ ]*[0-9a-f]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%rcx\)
-[ ]*[0-9a-f]+: 66 48 0f 3a 16 c1 00 pextrq \$0x0,%xmm0,%rcx
-[ ]*[0-9a-f]+: 66 48 0f 3a 16 01 00 pextrq \$0x0,%xmm0,\(%rcx\)
-[ ]*[0-9a-f]+: 66 48 0f c5 c8 00 pextrw \$0x0,%xmm0,%rcx
-[ ]*[0-9a-f]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
-[ ]*[0-9a-f]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%rcx\)
-[ ]*[0-9a-f]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 41 01 phminposuw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
-[ ]*[0-9a-f]+: 66 48 0f 3a 20 c1 00 pinsrb \$0x0,%rcx,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0
-[ ]*[0-9a-f]+: 66 48 0f 3a 22 01 00 pinsrq \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 48 0f 3a 22 c1 00 pinsrq \$0x0,%rcx,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3c 01 pmaxsb \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3d 01 pmaxsd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3f 01 pmaxud \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3e 01 pmaxuw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 38 01 pminsb \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 39 01 pminsd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3b 01 pminud \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 3a 01 pminuw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 20 01 pmovsxbw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 21 01 pmovsxbd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 22 01 pmovsxbq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 23 01 pmovsxwd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 24 01 pmovsxwq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 25 01 pmovsxdq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 30 01 pmovzxbw \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 31 01 pmovzxbd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 32 01 pmovzxbq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 33 01 pmovzxwd \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 34 01 pmovzxwq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 35 01 pmovzxdq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 28 01 pmuldq \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 40 01 pmulld \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 38 17 01 ptest \(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%rcx\),%xmm0
-[ ]*[0-9a-f]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd %xmm0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps %xmm0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0
-[ ]*[a-f0-9]+: 66 48 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%rcx
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%rcx\)
[ ]*[a-f0-9]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0
@@ -137,25 +34,128 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb %xmm0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 29 01 pcmpeqq \(%rcx\),%xmm0
-[ ]*[a-f0-9]+: 66 48 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%rcx
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%rcx\)
[ ]*[a-f0-9]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%rcx\)
[ ]*[a-f0-9]+: 66 48 0f 3a 16 c1 00 pextrq \$0x0,%xmm0,%rcx
[ ]*[a-f0-9]+: 66 48 0f 3a 16 01 00 pextrq \$0x0,%xmm0,\(%rcx\)
-[ ]*[a-f0-9]+: 66 48 0f c5 c8 00 pextrw \$0x0,%xmm0,%rcx
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
[ ]*[a-f0-9]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%rcx\)
[ ]*[a-f0-9]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0
[ ]*[a-f0-9]+: 66 0f 38 41 01 phminposuw \(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
-[ ]*[a-f0-9]+: 66 48 0f 3a 20 c1 00 pinsrb \$0x0,%rcx,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 66 48 0f 3a 22 01 00 pinsrq \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 48 0f 3a 22 c1 00 pinsrq \$0x0,%rcx,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3c c1 pmaxsb %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3c 01 pmaxsb \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3d c1 pmaxsd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3d 01 pmaxsd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3f c1 pmaxud %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3f 01 pmaxud \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3e c1 pmaxuw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3e 01 pmaxuw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 38 c1 pminsb %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 38 01 pminsb \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 39 c1 pminsd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 39 01 pminsd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3b c1 pminud %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3b 01 pminud \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3a c1 pminuw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 3a 01 pminuw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 20 c1 pmovsxbw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 20 01 pmovsxbw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 21 c1 pmovsxbd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 21 01 pmovsxbd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 22 c1 pmovsxbq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 22 01 pmovsxbq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 23 c1 pmovsxwd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 23 01 pmovsxwd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 24 c1 pmovsxwq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 24 01 pmovsxwq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 25 c1 pmovsxdq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 25 01 pmovsxdq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 30 c1 pmovzxbw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 30 01 pmovzxbw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 31 c1 pmovzxbd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 31 01 pmovzxbd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 32 c1 pmovzxbq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 32 01 pmovzxbq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 33 c1 pmovzxwd %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 33 01 pmovzxwd \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 34 c1 pmovzxwq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 34 01 pmovzxwq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 35 c1 pmovzxdq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 35 01 pmovzxdq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 28 c1 pmuldq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 28 01 pmuldq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 40 c1 pmulld %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 40 01 pmulld \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 17 c1 ptest %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 17 01 ptest \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 09 01 00 roundpd \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 09 c1 00 roundpd \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 08 01 00 roundps \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 08 c1 00 roundps \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0b 01 00 roundsd \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0b c1 00 roundsd \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0a 01 00 roundss \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0a c1 00 roundss \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0d 01 00 blendpd \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0d c1 00 blendpd \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0c 01 00 blendps \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0c c1 00 blendps \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 01 blendvpd %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 15 c1 blendvpd %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 01 blendvps %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 14 c1 blendvps %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 41 01 00 dppd \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 41 c1 00 dppd \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 40 01 00 dpps \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 40 c1 00 dpps \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 17 c1 00 extractps \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 17 01 00 extractps \$0x0,%xmm0,\(%rcx\)
+[ ]*[a-f0-9]+: 66 0f 3a 21 c1 00 insertps \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 21 01 00 insertps \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 2a 01 movntdqa \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 42 01 00 mpsadbw \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 42 c1 00 mpsadbw \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 2b 01 packusdw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 2b c1 packusdw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 01 pblendvb %xmm0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 10 c1 pblendvb %xmm0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0e 01 00 pblendw \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 0e c1 00 pblendw \$0x0,%xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 29 c1 pcmpeqq %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 29 01 pcmpeqq \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 14 c1 00 pextrb \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 14 01 00 pextrb \$0x0,%xmm0,\(%rcx\)
+[ ]*[a-f0-9]+: 66 0f 3a 16 c1 00 pextrd \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 16 01 00 pextrd \$0x0,%xmm0,\(%rcx\)
+[ ]*[a-f0-9]+: 66 48 0f 3a 16 c1 00 pextrq \$0x0,%xmm0,%rcx
+[ ]*[a-f0-9]+: 66 48 0f 3a 16 01 00 pextrq \$0x0,%xmm0,\(%rcx\)
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f c5 c8 00 pextrw \$0x0,%xmm0,%ecx
+[ ]*[a-f0-9]+: 66 0f 3a 15 01 00 pextrw \$0x0,%xmm0,\(%rcx\)
+[ ]*[a-f0-9]+: 66 0f 38 41 c1 phminposuw %xmm1,%xmm0
+[ ]*[a-f0-9]+: 66 0f 38 41 01 phminposuw \(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 20 01 00 pinsrb \$0x0,\(%rcx\),%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
+[ ]*[a-f0-9]+: 66 0f 3a 20 c1 00 pinsrb \$0x0,%ecx,%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 22 01 00 pinsrd \$0x0,\(%rcx\),%xmm0
[ ]*[a-f0-9]+: 66 0f 3a 22 c1 00 pinsrd \$0x0,%ecx,%xmm0
[ ]*[a-f0-9]+: 66 48 0f 3a 22 01 00 pinsrq \$0x0,\(%rcx\),%xmm0
--- binutils/opcodes/i386-opc.tbl.rexw 2008-01-03 17:05:55.000000000 -0800
+++ binutils/opcodes/i386-opc.tbl 2008-01-04 09:26:16.000000000 -0800
@@ -1086,7 +1086,7 @@ movhps, 2, 0xf17, None, 2, CpuSSE, Modrm
movlhps, 2, 0xf16, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, RegXMM }
movlps, 2, 0xf12, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
movlps, 2, 0xf13, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
-movmskps, 2, 0xf50, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+movmskps, 2, 0xf50, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
movntps, 2, 0xf2b, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
movntq, 2, 0xfe7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
movntdq, 2, 0x660fe7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
@@ -1101,14 +1101,14 @@ pavgb, 2, 0xfe0, None, 2, CpuMMX2, Modrm
pavgb, 2, 0x660fe0, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pavgw, 2, 0xfe3, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
pavgw, 2, 0x660fe3, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pextrw, 3, 0xfc5, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, RegMMX, Reg32|Reg64 }
-pextrw, 3, 0x660fc5, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64 }
+pextrw, 3, 0xfc5, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, RegMMX, Reg32|Reg64 }
+pextrw, 3, 0x660fc5, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
// Streaming SIMD extensions 4.1 Instructions.
-pextrw, 3, 0x660f3a15, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+pextrw, 3, 0x660f3a15, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
-pinsrw, 3, 0xfc4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegMMX }
-pinsrw, 3, 0x660fc4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
+pinsrw, 3, 0xfc4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegMMX }
+pinsrw, 3, 0x660fc4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
pmaxsw, 2, 0xfee, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
pmaxsw, 2, 0x660fee, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pmaxub, 2, 0xfde, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
@@ -1117,8 +1117,8 @@ pminsw, 2, 0xfea, None, 2, CpuMMX2, Modr
pminsw, 2, 0x660fea, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pminub, 2, 0xfda, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
pminub, 2, 0x660fda, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pmovmskb, 2, 0xfd7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegMMX, Reg32|Reg64 }
-pmovmskb, 2, 0x660fd7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+pmovmskb, 2, 0xfd7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegMMX, Reg32|Reg64 }
+pmovmskb, 2, 0x660fd7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
pmulhuw, 2, 0xfe4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
pmulhuw, 2, 0x660fe4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
prefetchnta, 1, 0xf18, 0x0, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S }
@@ -1186,7 +1186,7 @@ movhpd, 2, 0x660f16, None, 2, CpuSSE2, M
movhpd, 2, 0x660f17, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
movlpd, 2, 0x660f12, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
movlpd, 2, 0x660f13, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
-movmskpd, 2, 0x660f50, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+movmskpd, 2, 0x660f50, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
movntpd, 2, 0x660f2b, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
// Intel mode string move.
movsd, 0, 0xa5, None, 1, 0, Size32|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|IsString, { 0 }
@@ -1335,7 +1335,7 @@ blendvps, 3, 0x660f3814, None, 3, CpuSSE
blendvps, 2, 0x660f3814, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
dppd, 3, 0x660f3a41, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
dpps, 3, 0x660f3a40, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-extractps, 3, 0x660f3a17, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+extractps, 3, 0x660f3a17, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
insertps, 3, 0x660f3a21, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
movntdqa, 2, 0x660f382a, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
mpsadbw, 3, 0x660f3a42, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
@@ -1344,11 +1344,11 @@ pblendvb, 3, 0x660f3810, None, 3, CpuSSE
pblendvb, 2, 0x660f3810, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pblendw, 3, 0x660f3a0e, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
pcmpeqq, 2, 0x660f3829, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pextrb, 3, 0x660f3a14, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+pextrb, 3, 0x660f3a14, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
pextrd, 3, 0x660f3a16, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
pextrq, 3, 0x660f3a16, None, 3, CpuSSE4_1|Cpu64, Modrm|Size64|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
phminposuw, 2, 0x660f3841, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pinsrb, 3, 0x660f3a20, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
+pinsrb, 3, 0x660f3a20, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
pinsrd, 3, 0x660f3a22, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Reg32|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
pinsrq, 3, 0x660f3a22, None, 3, CpuSSE4_1|Cpu64, Modrm|Size64|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
pmaxsb, 2, 0x660f383c, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }