This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH 2/3] sparc: assembly version of memmove for ultra1+
Hi Patrick.
Nitpick below.
Sam
> +
> +.Ls2alg:
> + lduh [%o1], %o3 /* know src is 2 byte aligned */
> + inc 2, %o1
> + srl %o3, 8, %o4
> + stb %o4, [%o0] /* have to do bytes, */
> + stb %o3, [%o0 + 1] /* don't know dst alingment */
> + inc 2, %o0
> + dec 2, %o2
> +
> +.Laldst:
> + andcc %o0, 3, %o5 /* align the destination address */
.Lald: bz,pn %icc, .Lw4cp
Label on own line would make patches more readable.
But src looks OK.
> + cmp %o5, 2
> + bz,pn %icc, .Lw2cp
> + cmp %o5, 3
> +.Lw3cp:
> + lduw [%o1], %o4
> + inc 4, %o1
> + srl %o4, 24, %o5
> + stb %o5, [%o0]
> + bne,pt %icc, .Lw1cp
> + inc %o0
> + dec 1, %o2
> + andn %o2, 3, %o3 /* i3 is aligned word count */
> + dec 4, %o3 /* avoid reading beyond tail of src */
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +1: sll %o4, 8, %g1 /* save residual bytes */
> + lduw [%o1+%o0], %o4
> + deccc 4, %o3
> + srl %o4, 24, %o5 /* merge with residual */
> + or %o5, %g1, %g1
> + st %g1, [%o0]
> + bnz,pt %XCC, 1b
> + inc 4, %o0
> + sub %o1, 3, %o1 /* used one byte of last word read */
> + and %o2, 3, %o2
> + b 7f
> + inc 4, %o2
> +
> +.Lw1cp:
> + srl %o4, 8, %o5
> + sth %o5, [%o0]
> + inc 2, %o0
> + dec 3, %o2
> + andn %o2, 3, %o3
> + dec 4, %o3 /* avoid reading beyond tail of src */
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +2: sll %o4, 24, %g1 /* save residual bytes */
> + lduw [%o1+%o0], %o4
> + deccc 4, %o3
> + srl %o4, 8, %o5 /* merge with residual */
> + or %o5, %g1, %g1
> + st %g1, [%o0]
> + bnz,pt %XCC, 2b
> + inc 4, %o0
> + sub %o1, 1, %o1 /* used three bytes of last word read */
> + and %o2, 3, %o2
> + b 7f
> + inc 4, %o2
Delay slot - indent instruction with one space.
> +
> +.Lw2cp:
> + lduw [%o1], %o4
> + inc 4, %o1
> + srl %o4, 16, %o5
> + sth %o5, [%o0]
> + inc 2, %o0
> + dec 2, %o2
> + andn %o2, 3, %o3 /* i3 is aligned word count */
> + dec 4, %o3 /* avoid reading beyond tail of src */
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +3: sll %o4, 16, %g1 /* save residual bytes */
> + lduw [%o1+%o0], %o4
> + deccc 4, %o3
> + srl %o4, 16, %o5 /* merge with residual */
> + or %o5, %g1, %g1
> + st %g1, [%o0]
> + bnz,pt %XCC, 3b
> + inc 4, %o0
> + sub %o1, 2, %o1 /* used two bytes of last word read */
> + and %o2, 3, %o2
> + b 7f
> + inc 4, %o2
> +
> +.Lw4cp:
> + andn %o2, 3, %o3 /* i3 is aligned word count */
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +1: lduw [%o1+%o0], %o4 /* read from address */
> + deccc 4, %o3 /* decrement count */
> + st %o4, [%o0] /* write at destination address */
> + bg,pt %XCC, 1b
> + inc 4, %o0 /* increment to address */
> + b 7f
> + and %o2, 3, %o2 /* number of leftover bytes, if any */
> +
> +/*
> + * differenced byte copy, works with any alignment
> + */
> +.Ldbytecp:
> + b 7f
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +4: stb %o4, [%o0] /* write to address */
> + inc %o0 /* inc to address */
> +7: deccc %o2 /* decrement count */
> + bge,a %XCC, 4b /* loop till done */
> + ldub [%o1+%o0], %o4 /* read from address */
> + retl
> + mov %g2, %o0 /* return pointer to destination */
> +
> +/*
> + * an overlapped copy that must be done "backwards"
> + */
> +.Lovbc:
> + add %o1, %o2, %o1 /* get to end of source space */
> + add %o0, %o2, %o0 /* get to end of destination space */
> + sub %o1, %o0, %o1 /* i1 gets the difference */
> +
> +5: dec %o0 /* decrement to address */
> + ldub [%o1+%o0], %o3 /* read a byte */
> + deccc %o2 /* decrement count */
> + bg,pt %XCC, 5b /* loop until done */
> + stb %o3, [%o0] /* write byte */
> + retl
> + mov %g2, %o0 /* return pointer to destination */
> +END(memmove)
> +
> +libc_hidden_builtin_def (memmove)