This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] powerpc: Use aligned stores in memset


On Thu, Aug 17, 2017 at 10:11 PM, Rajalakshmi Srinivasaraghavan
<raji@linux.vnet.ibm.com> wrote:
> The powerpc hardware does not allow unaligned accesses on non cacheable
> memory.  This patch avoids misaligned stores for sizes less than 8 in
> memset to avoid such cases.  Tested on powerpc64 and powerpc64le.

Why are you using memset on non cacheable memory?  In fact how are you
getting non-cacheable memory, mmap of /dev/mem or something different?

Thanks,
Andrew

>
> 2017-08-17  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
>
>         * sysdeps/powerpc/powerpc64/power8/memset.S: Store byte by byte
>         for unaligned inputs if size is less than 8.
> ---
>  sysdeps/powerpc/powerpc64/power8/memset.S | 68 ++++++++++++++++++++++++++++++-
>  1 file changed, 66 insertions(+), 2 deletions(-)
>
> diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
> index 7ad3bb1b00..504bab0841 100644
> --- a/sysdeps/powerpc/powerpc64/power8/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power8/memset.S
> @@ -377,7 +377,8 @@ L(write_LT_32):
>         subf    r5,r0,r5
>
>  2:     bf      30,1f
> -       sth     r4,0(r10)
> +       stb     r4,0(r10)
> +       stb     r4,1(r10)
>         addi    r10,r10,2
>
>  1:     bf      31,L(end_4bytes_alignment)
> @@ -437,11 +438,74 @@ L(tail5):
>         /* Handles copies of 0~8 bytes.  */
>         .align  4
>  L(write_LE_8):
> -       bne     cr6,L(tail4)
> +       /* Use stb instead of sth which is safe for
> +          both aligned and unaligned inputs.  */
> +       bne     cr6,L(LE7_tail4)
> +       /* If input is word aligned, use stw, Else use stb.  */
> +       andi.   r0,r10,3
> +       bne     L(8_unalign)
>
>         stw     r4,0(r10)
>         stw     r4,4(r10)
>         blr
> +
> +       /* Unaligned input and size is 8.  */
> +       .align  4
> +L(8_unalign):
> +       andi.   r0,r10,1
> +       beq     L(8_hwalign)
> +       stb     r4,0(r10)
> +       sth     r4,1(r10)
> +       sth     r4,3(r10)
> +       sth     r4,5(r10)
> +       stb     r4,7(r10)
> +       blr
> +
> +       /* Halfword aligned input and size is 8.  */
> +       .align  4
> +L(8_hwalign):
> +       sth     r4,0(r10)
> +       sth     r4,2(r10)
> +       sth     r4,4(r10)
> +       sth     r4,6(r10)
> +       blr
> +
> +       .align  4
> +       /* Copies 4~7 bytes.  */
> +L(LE7_tail4):
> +       bf      29,L(LE7_tail2)
> +       stb     r4,0(r10)
> +       stb     r4,1(r10)
> +       stb     r4,2(r10)
> +       stb     r4,3(r10)
> +       bf      30,L(LE7_tail5)
> +       stb     r4,4(r10)
> +       stb     r4,5(r10)
> +       bflr    31
> +       stb     r4,6(r10)
> +       blr
> +
> +       .align  4
> +       /* Copies 2~3 bytes.  */
> +L(LE7_tail2):
> +       bf      30,1f
> +       stb     r4,0(r10)
> +       stb     r4,1(r10)
> +       bflr    31
> +       stb     r4,2(r10)
> +       blr
> +
> +       .align  4
> +L(LE7_tail5):
> +       bflr    31
> +       stb     r4,4(r10)
> +       blr
> +
> +       .align  4
> +1:     bflr    31
> +       stb     r4,0(r10)
> +       blr
> +
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
>
> --
> 2.11.0
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]