This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Optimize out the REX.W bit


The REX.W bit is ignored for extractps, movmskpd, movmskps, pextrb,
pextrw, pinsrb, pinsrw and pmovmskb.  I am checking in this patch
to optimize it out.


H.J.
---
gas/testsuite/

2008-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	* gas/i386/rexw.d: New.
	* gas/i386/rexw.s: Likewise.

	* gas/i386/x86-64-sse4_1-intel.d: Updated.
	* gas/i386/x86-64-sse4_1.d: Likewise.

opcodes/

2008-01-04  H.J. Lu  <hongjiu.lu@intel.com>

	* i386-opc.tbl: Add NoRex64 to extractps, movmskpd, movmskps,
	pextrb, pextrw, pinsrb, pinsrw and pmovmskb.
	* i386-tbl.h: Regenerated.

--- binutils/gas/testsuite/gas/i386/i386.exp.rexw	2008-01-03 17:05:56.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/i386.exp	2008-01-04 09:27:18.000000000 -0800
@@ -216,6 +216,7 @@ if [expr ([istarget "i*86-*-*"] || [ista
     run_dump_test "x86-64-sib-intel"
     run_dump_test "x86-64-opcode-inval"
     run_dump_test "x86-64-opcode-inval-intel"
+    run_dump_test "rexw"
 
     if { ![istarget "*-*-aix*"]
       && ![istarget "*-*-beos*"]
--- binutils/gas/testsuite/gas/i386/rexw.d.rexw	2008-01-04 09:26:16.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/rexw.d	2008-01-04 09:46:53.000000000 -0800
@@ -0,0 +1,31 @@
+#objdump: -dw
+#name: x86-64 REX.W optimization
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+ <foo>:
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 50 ca          	movmskpd %xmm2,%ecx
+[ 	]*[a-f0-9]+:	0f 50 ca             	movmskps %xmm2,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	0f c5 c8 00          	pextrw \$0x0,%mm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	66 0f c4 c1 00       	pinsrw \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	0f c4 c1 00          	pinsrw \$0x0,%ecx,%mm0
+[ 	]*[a-f0-9]+:	0f d7 c5             	pmovmskb %mm5,%eax
+[ 	]*[a-f0-9]+:	66 0f d7 c5          	pmovmskb %xmm5,%eax
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 50 ca          	movmskpd %xmm2,%ecx
+[ 	]*[a-f0-9]+:	0f 50 ca             	movmskps %xmm2,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	0f c5 c8 00          	pextrw \$0x0,%mm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	0f c4 c1 00          	pinsrw \$0x0,%ecx,%mm0
+[ 	]*[a-f0-9]+:	66 0f c4 c1 00       	pinsrw \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	0f d7 cd             	pmovmskb %mm5,%ecx
+[ 	]*[a-f0-9]+:	66 0f d7 cd          	pmovmskb %xmm5,%ecx
+#pass
--- binutils/gas/testsuite/gas/i386/rexw.s.rexw	2008-01-04 09:26:16.000000000 -0800
+++ binutils/gas/testsuite/gas/i386/rexw.s	2008-01-04 09:47:11.000000000 -0800
@@ -0,0 +1,28 @@
+# Instructions where REX.W is ignored.
+
+	.text
+foo:
+	extractps $0x0,%xmm0,%rcx
+	movmskpd %xmm2,%rcx
+	movmskps %xmm2,%rcx
+	pextrb $0x0,%xmm0,%rcx
+	pextrw $0x0,%mm0,%rcx
+	pextrw $0x0,%xmm0,%rcx
+	pinsrb $0x0,%rcx,%xmm0
+	pinsrw $0x0,%rcx,%xmm0
+	pinsrw $0x0,%rcx,%mm0
+	pmovmskb %mm5,%rax
+	pmovmskb %xmm5,%rax
+
+	.intel_syntax noprefix
+	extractps rcx,xmm0,0x0
+	movmskpd rcx,xmm2
+	movmskps rcx,xmm2
+	pextrb rcx,xmm0,0x0
+	pextrw rcx,mm0,0x0
+	pextrw rcx,xmm0,0x0
+	pinsrb xmm0,rcx,0x0
+	pinsrw mm0,rcx,0x0
+	pinsrw xmm0,rcx,0x0
+	pmovmskb rcx,mm5
+	pmovmskb rcx,xmm5
--- binutils/gas/testsuite/gas/i386/x86-64-sse4_1-intel.d.rexw	2007-09-12 11:46:41.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-sse4_1-intel.d	2008-01-04 09:26:16.000000000 -0800
@@ -11,19 +11,19 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 0f 3a 0d c1 00    	blendpd xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 0c 01 00    	blendps xmm0,XMMWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 0c c1 00    	blendps xmm0,xmm1,0x0
-[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd xmm0,xmm1(,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd xmm0,xmm1(,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps xmm0,xmm1(,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps xmm0,xmm1(,xmm0)?
+[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd xmm0,xmm1,xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd xmm0,xmm1,xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps xmm0,xmm1,xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps xmm0,xmm1,xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 41 01 00    	dppd   xmm0,XMMWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 41 c1 00    	dppd   xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 01 00    	dpps   xmm0,XMMWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 c1 00    	dpps   xmm0,xmm1,0x0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 17 c1 00 	extractps rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 17 01 00    	extractps DWORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 21 c1 00    	insertps xmm0,xmm1,0x0
@@ -33,29 +33,29 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 0f 3a 42 c1 00    	mpsadbw xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 38 2b 01       	packusdw xmm0,XMMWORD PTR \[rcx\]
 [ 	]*[a-f0-9]+:	66 0f 38 2b c1       	packusdw xmm0,xmm1
-[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb xmm0,xmm1(,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb xmm0,XMMWORD PTR \[rcx\](,xmm0)?
-[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb xmm0,xmm1(,xmm0)?
+[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb xmm0,xmm1,xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb xmm0,XMMWORD PTR \[rcx\],xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb xmm0,xmm1,xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0e 01 00    	pblendw xmm0,XMMWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 0e c1 00    	pblendw xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 38 29 c1       	pcmpeqq xmm0,xmm1
 [ 	]*[a-f0-9]+:	66 0f 38 29 01       	pcmpeqq xmm0,XMMWORD PTR \[rcx\]
-[ 	]*[a-f0-9]+:	66 48 0f 3a 14 c1 00 	pextrb rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 14 01 00    	pextrb BYTE PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 16 c1 00    	pextrd ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 16 01 00    	pextrd DWORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 c1 00 	pextrq rcx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 01 00 	pextrq QWORD PTR \[rcx\],xmm0,0x0
-[ 	]*[a-f0-9]+:	66 48 0f c5 c8 00    	pextrw rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 15 01 00    	pextrw WORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 38 41 c1       	phminposuw xmm0,xmm1
 [ 	]*[a-f0-9]+:	66 0f 38 41 01       	phminposuw xmm0,XMMWORD PTR \[rcx\]
 [ 	]*[a-f0-9]+:	66 0f 3a 20 01 00    	pinsrb xmm0,BYTE PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb xmm0,ecx,0x0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 20 c1 00 	pinsrb xmm0,rcx,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb xmm0,ecx,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 01 00    	pinsrd xmm0,DWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 c1 00    	pinsrd xmm0,ecx,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 22 01 00 	pinsrq xmm0,QWORD PTR \[rcx\],0x0
@@ -126,7 +126,7 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 0f 3a 41 c1 00    	dppd   xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 01 00    	dpps   xmm0,XMMWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 c1 00    	dpps   xmm0,xmm1,0x0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 17 c1 00 	extractps rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 17 01 00    	extractps DWORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 21 c1 00    	insertps xmm0,xmm1,0x0
@@ -142,21 +142,21 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 0f 3a 0e c1 00    	pblendw xmm0,xmm1,0x0
 [ 	]*[a-f0-9]+:	66 0f 38 29 c1       	pcmpeqq xmm0,xmm1
 [ 	]*[a-f0-9]+:	66 0f 38 29 01       	pcmpeqq xmm0,XMMWORD PTR \[rcx\]
-[ 	]*[a-f0-9]+:	66 48 0f 3a 14 c1 00 	pextrb rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 14 01 00    	pextrb BYTE PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 16 c1 00    	pextrd ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 16 01 00    	pextrd DWORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 c1 00 	pextrq rcx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 01 00 	pextrq QWORD PTR \[rcx\],xmm0,0x0
-[ 	]*[a-f0-9]+:	66 48 0f c5 c8 00    	pextrw rcx,xmm0,0x0
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw ecx,xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 15 01 00    	pextrw WORD PTR \[rcx\],xmm0,0x0
 [ 	]*[a-f0-9]+:	66 0f 38 41 c1       	phminposuw xmm0,xmm1
 [ 	]*[a-f0-9]+:	66 0f 38 41 01       	phminposuw xmm0,XMMWORD PTR \[rcx\]
 [ 	]*[a-f0-9]+:	66 0f 3a 20 01 00    	pinsrb xmm0,BYTE PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb xmm0,ecx,0x0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 20 c1 00 	pinsrb xmm0,rcx,0x0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb xmm0,ecx,0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 01 00    	pinsrd xmm0,DWORD PTR \[rcx\],0x0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 c1 00    	pinsrd xmm0,ecx,0x0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 22 01 00 	pinsrq xmm0,QWORD PTR \[rcx\],0x0
--- binutils/gas/testsuite/gas/i386/x86-64-sse4_1.d.rexw	2007-09-12 11:46:41.000000000 -0700
+++ binutils/gas/testsuite/gas/i386/x86-64-sse4_1.d	2008-01-04 09:26:16.000000000 -0800
@@ -6,126 +6,23 @@
 Disassembly of section .text:
 
 0+000 <foo>:
-[ 	]*[0-9a-f]+:	66 0f 3a 0d 01 00    	blendpd \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0d c1 00    	blendpd \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0c 01 00    	blendps \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0c c1 00    	blendps \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 15 01       	blendvpd (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 15 c1       	blendvpd (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 15 01       	blendvpd (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 15 c1       	blendvpd (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 14 01       	blendvps (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 14 c1       	blendvps (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 14 01       	blendvps (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 14 c1       	blendvps (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 41 01 00    	dppd   \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 41 c1 00    	dppd   \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 40 01 00    	dpps   \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 40 c1 00    	dpps   \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 48 0f 3a 17 c1 00 	extractps \$0x0,%xmm0,%rcx
-[ 	]*[0-9a-f]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
-[ 	]*[0-9a-f]+:	66 0f 3a 17 01 00    	extractps \$0x0,%xmm0,\(%rcx\)
-[ 	]*[0-9a-f]+:	66 0f 3a 21 c1 00    	insertps \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 21 01 00    	insertps \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 2a 01       	movntdqa \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 42 01 00    	mpsadbw \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 42 c1 00    	mpsadbw \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 2b 01       	packusdw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 2b c1       	packusdw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 10 01       	pblendvb (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 10 c1       	pblendvb (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 10 01       	pblendvb (%xmm0,)?\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 10 c1       	pblendvb (%xmm0,)?%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0e 01 00    	pblendw \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0e c1 00    	pblendw \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 29 c1       	pcmpeqq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 29 01       	pcmpeqq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 48 0f 3a 14 c1 00 	pextrb \$0x0,%xmm0,%rcx
-[ 	]*[0-9a-f]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
-[ 	]*[0-9a-f]+:	66 0f 3a 14 01 00    	pextrb \$0x0,%xmm0,\(%rcx\)
-[ 	]*[0-9a-f]+:	66 0f 3a 16 c1 00    	pextrd \$0x0,%xmm0,%ecx
-[ 	]*[0-9a-f]+:	66 0f 3a 16 01 00    	pextrd \$0x0,%xmm0,\(%rcx\)
-[ 	]*[0-9a-f]+:	66 48 0f 3a 16 c1 00 	pextrq \$0x0,%xmm0,%rcx
-[ 	]*[0-9a-f]+:	66 48 0f 3a 16 01 00 	pextrq \$0x0,%xmm0,\(%rcx\)
-[ 	]*[0-9a-f]+:	66 48 0f c5 c8 00    	pextrw \$0x0,%xmm0,%rcx
-[ 	]*[0-9a-f]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
-[ 	]*[0-9a-f]+:	66 0f 3a 15 01 00    	pextrw \$0x0,%xmm0,\(%rcx\)
-[ 	]*[0-9a-f]+:	66 0f 38 41 c1       	phminposuw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 41 01       	phminposuw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 20 01 00    	pinsrb \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
-[ 	]*[0-9a-f]+:	66 48 0f 3a 20 c1 00 	pinsrb \$0x0,%rcx,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 22 01 00    	pinsrd \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 22 c1 00    	pinsrd \$0x0,%ecx,%xmm0
-[ 	]*[0-9a-f]+:	66 48 0f 3a 22 01 00 	pinsrq \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 48 0f 3a 22 c1 00 	pinsrq \$0x0,%rcx,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3c c1       	pmaxsb %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3c 01       	pmaxsb \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3d c1       	pmaxsd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3d 01       	pmaxsd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3f c1       	pmaxud %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3f 01       	pmaxud \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3e c1       	pmaxuw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3e 01       	pmaxuw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 38 c1       	pminsb %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 38 01       	pminsb \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 39 c1       	pminsd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 39 01       	pminsd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3b c1       	pminud %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3b 01       	pminud \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3a c1       	pminuw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 3a 01       	pminuw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 20 c1       	pmovsxbw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 20 01       	pmovsxbw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 21 c1       	pmovsxbd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 21 01       	pmovsxbd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 22 c1       	pmovsxbq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 22 01       	pmovsxbq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 23 c1       	pmovsxwd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 23 01       	pmovsxwd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 24 c1       	pmovsxwq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 24 01       	pmovsxwq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 25 c1       	pmovsxdq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 25 01       	pmovsxdq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 30 c1       	pmovzxbw %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 30 01       	pmovzxbw \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 31 c1       	pmovzxbd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 31 01       	pmovzxbd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 32 c1       	pmovzxbq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 32 01       	pmovzxbq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 33 c1       	pmovzxwd %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 33 01       	pmovzxwd \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 34 c1       	pmovzxwq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 34 01       	pmovzxwq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 35 c1       	pmovzxdq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 35 01       	pmovzxdq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 28 c1       	pmuldq %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 28 01       	pmuldq \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 40 c1       	pmulld %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 40 01       	pmulld \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 17 c1       	ptest  %xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 38 17 01       	ptest  \(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 09 01 00    	roundpd \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 09 c1 00    	roundpd \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 08 01 00    	roundps \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 08 c1 00    	roundps \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0b 01 00    	roundsd \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0b c1 00    	roundsd \$0x0,%xmm1,%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0a 01 00    	roundss \$0x0,\(%rcx\),%xmm0
-[ 	]*[0-9a-f]+:	66 0f 3a 0a c1 00    	roundss \$0x0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0d 01 00    	blendpd \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0d c1 00    	blendpd \$0x0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0c 01 00    	blendps \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0c c1 00    	blendps \$0x0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd %xmm0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps %xmm0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps %xmm0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps %xmm0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 41 01 00    	dppd   \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 41 c1 00    	dppd   \$0x0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 01 00    	dpps   \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 40 c1 00    	dpps   \$0x0,%xmm1,%xmm0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 17 c1 00 	extractps \$0x0,%xmm0,%rcx
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 17 01 00    	extractps \$0x0,%xmm0,\(%rcx\)
 [ 	]*[a-f0-9]+:	66 0f 3a 21 c1 00    	insertps \$0x0,%xmm1,%xmm0
@@ -137,25 +34,128 @@ Disassembly of section .text:
 [ 	]*[a-f0-9]+:	66 0f 38 2b c1       	packusdw %xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb %xmm0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb %xmm0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0e 01 00    	pblendw \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 0e c1 00    	pblendw \$0x0,%xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 29 c1       	pcmpeqq %xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 29 01       	pcmpeqq \(%rcx\),%xmm0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 14 c1 00 	pextrb \$0x0,%xmm0,%rcx
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 14 01 00    	pextrb \$0x0,%xmm0,\(%rcx\)
 [ 	]*[a-f0-9]+:	66 0f 3a 16 c1 00    	pextrd \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 16 01 00    	pextrd \$0x0,%xmm0,\(%rcx\)
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 c1 00 	pextrq \$0x0,%xmm0,%rcx
 [ 	]*[a-f0-9]+:	66 48 0f 3a 16 01 00 	pextrq \$0x0,%xmm0,\(%rcx\)
-[ 	]*[a-f0-9]+:	66 48 0f c5 c8 00    	pextrw \$0x0,%xmm0,%rcx
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
 [ 	]*[a-f0-9]+:	66 0f 3a 15 01 00    	pextrw \$0x0,%xmm0,\(%rcx\)
 [ 	]*[a-f0-9]+:	66 0f 38 41 c1       	phminposuw %xmm1,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 38 41 01       	phminposuw \(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 20 01 00    	pinsrb \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
-[ 	]*[a-f0-9]+:	66 48 0f 3a 20 c1 00 	pinsrb \$0x0,%rcx,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 22 01 00    	pinsrd \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 22 c1 00    	pinsrd \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	66 48 0f 3a 22 01 00 	pinsrq \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 48 0f 3a 22 c1 00 	pinsrq \$0x0,%rcx,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3c c1       	pmaxsb %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3c 01       	pmaxsb \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3d c1       	pmaxsd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3d 01       	pmaxsd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3f c1       	pmaxud %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3f 01       	pmaxud \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3e c1       	pmaxuw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3e 01       	pmaxuw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 38 c1       	pminsb %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 38 01       	pminsb \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 39 c1       	pminsd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 39 01       	pminsd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3b c1       	pminud %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3b 01       	pminud \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3a c1       	pminuw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 3a 01       	pminuw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 20 c1       	pmovsxbw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 20 01       	pmovsxbw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 21 c1       	pmovsxbd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 21 01       	pmovsxbd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 22 c1       	pmovsxbq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 22 01       	pmovsxbq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 23 c1       	pmovsxwd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 23 01       	pmovsxwd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 24 c1       	pmovsxwq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 24 01       	pmovsxwq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 25 c1       	pmovsxdq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 25 01       	pmovsxdq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 30 c1       	pmovzxbw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 30 01       	pmovzxbw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 31 c1       	pmovzxbd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 31 01       	pmovzxbd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 32 c1       	pmovzxbq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 32 01       	pmovzxbq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 33 c1       	pmovzxwd %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 33 01       	pmovzxwd \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 34 c1       	pmovzxwq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 34 01       	pmovzxwq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 35 c1       	pmovzxdq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 35 01       	pmovzxdq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 28 c1       	pmuldq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 28 01       	pmuldq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 40 c1       	pmulld %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 40 01       	pmulld \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 17 c1       	ptest  %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 17 01       	ptest  \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 09 01 00    	roundpd \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 09 c1 00    	roundpd \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 08 01 00    	roundps \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 08 c1 00    	roundps \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0b 01 00    	roundsd \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0b c1 00    	roundsd \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0a 01 00    	roundss \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0a c1 00    	roundss \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0d 01 00    	blendpd \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0d c1 00    	blendpd \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0c 01 00    	blendps \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0c c1 00    	blendps \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 01       	blendvpd %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 15 c1       	blendvpd %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 01       	blendvps %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 14 c1       	blendvps %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 41 01 00    	dppd   \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 41 c1 00    	dppd   \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 40 01 00    	dpps   \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 40 c1 00    	dpps   \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 17 c1 00    	extractps \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 17 01 00    	extractps \$0x0,%xmm0,\(%rcx\)
+[ 	]*[a-f0-9]+:	66 0f 3a 21 c1 00    	insertps \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 21 01 00    	insertps \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 2a 01       	movntdqa \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 42 01 00    	mpsadbw \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 42 c1 00    	mpsadbw \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 2b 01       	packusdw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 2b c1       	packusdw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 01       	pblendvb %xmm0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 10 c1       	pblendvb %xmm0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0e 01 00    	pblendw \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 0e c1 00    	pblendw \$0x0,%xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 29 c1       	pcmpeqq %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 29 01       	pcmpeqq \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 14 c1 00    	pextrb \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 14 01 00    	pextrb \$0x0,%xmm0,\(%rcx\)
+[ 	]*[a-f0-9]+:	66 0f 3a 16 c1 00    	pextrd \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 16 01 00    	pextrd \$0x0,%xmm0,\(%rcx\)
+[ 	]*[a-f0-9]+:	66 48 0f 3a 16 c1 00 	pextrq \$0x0,%xmm0,%rcx
+[ 	]*[a-f0-9]+:	66 48 0f 3a 16 01 00 	pextrq \$0x0,%xmm0,\(%rcx\)
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f c5 c8 00       	pextrw \$0x0,%xmm0,%ecx
+[ 	]*[a-f0-9]+:	66 0f 3a 15 01 00    	pextrw \$0x0,%xmm0,\(%rcx\)
+[ 	]*[a-f0-9]+:	66 0f 38 41 c1       	phminposuw %xmm1,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 38 41 01       	phminposuw \(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 01 00    	pinsrb \$0x0,\(%rcx\),%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
+[ 	]*[a-f0-9]+:	66 0f 3a 20 c1 00    	pinsrb \$0x0,%ecx,%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 01 00    	pinsrd \$0x0,\(%rcx\),%xmm0
 [ 	]*[a-f0-9]+:	66 0f 3a 22 c1 00    	pinsrd \$0x0,%ecx,%xmm0
 [ 	]*[a-f0-9]+:	66 48 0f 3a 22 01 00 	pinsrq \$0x0,\(%rcx\),%xmm0
--- binutils/opcodes/i386-opc.tbl.rexw	2008-01-03 17:05:55.000000000 -0800
+++ binutils/opcodes/i386-opc.tbl	2008-01-04 09:26:16.000000000 -0800
@@ -1086,7 +1086,7 @@ movhps, 2, 0xf17, None, 2, CpuSSE, Modrm
 movlhps, 2, 0xf16, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, RegXMM }
 movlps, 2, 0xf12, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 movlps, 2, 0xf13, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
-movmskps, 2, 0xf50, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+movmskps, 2, 0xf50, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
 movntps, 2, 0xf2b, None, 2, CpuSSE, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 movntq, 2, 0xfe7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegMMX, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 movntdq, 2, 0x660fe7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
@@ -1101,14 +1101,14 @@ pavgb, 2, 0xfe0, None, 2, CpuMMX2, Modrm
 pavgb, 2, 0x660fe0, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 pavgw, 2, 0xfe3, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
 pavgw, 2, 0x660fe3, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pextrw, 3, 0xfc5, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, RegMMX, Reg32|Reg64 }
-pextrw, 3, 0x660fc5, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64 }
+pextrw, 3, 0xfc5, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, RegMMX, Reg32|Reg64 }
+pextrw, 3, 0x660fc5, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
 
 // Streaming SIMD extensions 4.1 Instructions.
-pextrw, 3, 0x660f3a15, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+pextrw, 3, 0x660f3a15, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 
-pinsrw, 3, 0xfc4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegMMX }
-pinsrw, 3, 0x660fc4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
+pinsrw, 3, 0xfc4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegMMX }
+pinsrw, 3, 0x660fc4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 pmaxsw, 2, 0xfee, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
 pmaxsw, 2, 0x660fee, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 pmaxub, 2, 0xfde, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
@@ -1117,8 +1117,8 @@ pminsw, 2, 0xfea, None, 2, CpuMMX2, Modr
 pminsw, 2, 0x660fea, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 pminub, 2, 0xfda, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
 pminub, 2, 0x660fda, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pmovmskb, 2, 0xfd7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegMMX, Reg32|Reg64 }
-pmovmskb, 2, 0x660fd7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+pmovmskb, 2, 0xfd7, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegMMX, Reg32|Reg64 }
+pmovmskb, 2, 0x660fd7, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
 pmulhuw, 2, 0xfe4, None, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegMMX, RegMMX }
 pmulhuw, 2, 0x660fe4, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 prefetchnta, 1, 0xf18, 0x0, 2, CpuMMX2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S }
@@ -1186,7 +1186,7 @@ movhpd, 2, 0x660f16, None, 2, CpuSSE2, M
 movhpd, 2, 0x660f17, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 movlpd, 2, 0x660f12, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 movlpd, 2, 0x660f13, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
-movmskpd, 2, 0x660f50, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf, { RegXMM, Reg32|Reg64 }
+movmskpd, 2, 0x660f50, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|NoRex64, { RegXMM, Reg32|Reg64 }
 movntpd, 2, 0x660f2b, None, 2, CpuSSE2, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM, BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 // Intel mode string move.
 movsd, 0, 0xa5, None, 1, 0, Size32|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|IsString, { 0 }
@@ -1335,7 +1335,7 @@ blendvps, 3, 0x660f3814, None, 3, CpuSSE
 blendvps, 2, 0x660f3814, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 dppd, 3, 0x660f3a41, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 dpps, 3, 0x660f3a40, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-extractps, 3, 0x660f3a17, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+extractps, 3, 0x660f3a17, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 insertps, 3, 0x660f3a21, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 movntdqa, 2, 0x660f382a, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 mpsadbw, 3, 0x660f3a42, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
@@ -1344,11 +1344,11 @@ pblendvb, 3, 0x660f3810, None, 3, CpuSSE
 pblendvb, 2, 0x660f3810, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 pblendw, 3, 0x660f3a0e, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
 pcmpeqq, 2, 0x660f3829, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pextrb, 3, 0x660f3a14, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
+pextrb, 3, 0x660f3a14, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel|NoRex64, { Imm8, RegXMM, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 pextrd, 3, 0x660f3a16, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg32|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 pextrq, 3, 0x660f3a16, None, 3, CpuSSE4_1|Cpu64, Modrm|Size64|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, RegXMM, Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S }
 phminposuw, 2, 0x660f3841, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }
-pinsrb, 3, 0x660f3a20, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
+pinsrb, 3, 0x660f3a20, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|ByteOkIntel|NoRex64, { Imm8, Reg32|Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 pinsrd, 3, 0x660f3a22, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Reg32|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 pinsrq, 3, 0x660f3a22, None, 3, CpuSSE4_1|Cpu64, Modrm|Size64|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Imm8, Reg64|BaseIndex|Disp8|Disp16|Disp32|Disp32S, RegXMM }
 pmaxsb, 2, 0x660f383c, None, 3, CpuSSE4_1, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { BaseIndex|Disp8|Disp16|Disp32|Disp32S|RegXMM, RegXMM }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]