unofficial mirror of libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: "H.J. Lu" <hjl.tools@gmail.com>
To: libc-alpha@sourceware.org
Cc: fweimer@redhat.com
Subject: Re: [PATCH v4] x86-64: Allocate state buffer space for RDI, RSI and RBX
Date: Sun, 17 Mar 2024 06:04:42 -0700	[thread overview]
Message-ID: <CAMe9rOrSbFdpXnyBA3y7GfnX+Rr=_APFDgpkvu955_0UOqqX9w@mail.gmail.com> (raw)
In-Reply-To: <CAMe9rOpPe_r9=98Cs5LHs6sEXV--E=-4NEz0e2VK7CmOr3oKoQ@mail.gmail.com>

On Sun, Mar 17, 2024 at 6:02 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sun, Mar 17, 2024 at 5:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack.
> > After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11.  Define
> > TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX
> > to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to
> > STATE_SAVE_OFFSET(%rsp).
> >
> >    +==================+<- stack frame start aligned at 8 or 16 bytes
> >    |                  |<- RDI
> >    |                  |<- RSI
> >    |                  |<- RBX
> >    |                  |<- paddings from stack realignment of 64 bytes
> >    |------------------|<- xsave buffer end aligned at 64 bytes
> >    |                  |<-
> >    |                  |<-
> >    |                  |<-
> >    |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
> >    |                  |<- 8-byte padding
> >    |                  |<- 8-byte padding
> >    |                  |<- R11
> >    |                  |<- R10
> >    |                  |<- R9
> >    |                  |<- R8
> >    |                  |<- RDX
> >    |                  |<- RCX
> >    +==================+<- State buffer start aligned at 64 bytes
> >
> > This fixes BZ #31501.
> > ---
> >  sysdeps/x86/cpu-features.c         | 11 ++--
> >  sysdeps/x86/sysdep.h               | 29 ++++++++++
> >  sysdeps/x86_64/tst-gnu2-tls2mod1.S | 87 ++++++++++++++++++++++++++++++
> >  3 files changed, 123 insertions(+), 4 deletions(-)
> >  create mode 100644 sysdeps/x86_64/tst-gnu2-tls2mod1.S
> >
> > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > index 4ea373dffa..3d7c2819d7 100644
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
> >               /* NB: On AMX capable processors, ebx always includes AMX
> >                  states.  */
> >               unsigned int xsave_state_full_size
> > -               = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
> > +               = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> >
> >               cpu_features->xsave_state_size
> >                 = xsave_state_full_size;
> > @@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
> >                       unsigned int amx_size
> >                         = (xstate_amx_comp_offsets[31]
> >                            + xstate_amx_comp_sizes[31]);
> > -                     amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
> > -                                          64);
> > +                     amx_size
> > +                       = ALIGN_UP ((amx_size
> > +                                    + TLSDESC_CALL_REGISTER_SAVE_AREA),
> > +                                   64);
> >                       /* Set xsave_state_full_size to the compact AMX
> >                          state size for XSAVEC.  NB: xsave_state_full_size
> >                          is only used in _dl_tlsdesc_dynamic_xsave and
> > @@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
> >                       cpu_features->xsave_state_full_size = amx_size;
> >  #endif
> >                       cpu_features->xsave_state_size
> > -                       = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
> > +                       = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> > +                                   64);
> >                       CPU_FEATURE_SET (cpu_features, XSAVEC);
> >                     }
> >                 }
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index db8e576e91..46fcd27345 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -46,6 +46,34 @@
> >     red-zone into account.  */
> >  # define STATE_SAVE_OFFSET (8 * 7 + 8)
> >
> > +/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
> > +   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
> > +   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
> > +   RDI, RSI and RBX values on stack by xsave.
> > +
> > +   +==================+<- stack frame start aligned at 8 or 16 bytes
> > +   |                  |<- RDI
> > +   |                  |<- RSI
> > +   |                  |<- RBX
> > +   |                  |<- paddings from stack realignment of 64 bytes
> > +   |------------------|<- xsave buffer end aligned at 64 bytes
> > +   |                  |<-
> > +   |                  |<-
> > +   |                  |<-
> > +   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
> > +   |                  |<- 8-byte padding
> > +   |                  |<- 8-byte padding
> > +   |                  |<- R11
> > +   |                  |<- R10
> > +   |                  |<- R9
> > +   |                  |<- R8
> > +   |                  |<- RDX
> > +   |                  |<- RCX
> > +   +==================+<- State buffer start aligned at 64 bytes
> > +
> > +*/
> > +# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
> > +
> >  /* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
> >     registers are mutually exclusive.  */
> >  # define STATE_SAVE_MASK               \
> > @@ -68,6 +96,7 @@
> >  /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
> >     doesn't have red-zone, use 0 here.  */
> >  # define STATE_SAVE_OFFSET 0
> > +# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
> >
> >  /* Save SSE, AVX, AXV512, mask and bound registers.   */
> >  # define STATE_SAVE_MASK               \
> > diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> > new file mode 100644
> > index 0000000000..449ddd5c9d
> > --- /dev/null
> > +++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S
> > @@ -0,0 +1,87 @@
> > +/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
> > +   Copyright (C) 2024 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <http://www.gnu.org/licenses/>.  */
> > +
> > +#include <sysdep.h>
> > +
> > +/* On AVX512 machines, OFFSET == 104 caused _dl_tlsdesc_dynamic_xsavec
> > +   to clobber %rdi, %rsi and %rbx.  On Intel AVX CPUs, the state size
> > +   is 960 bytes and this test didn't fail.  It may be due to the unused
> > +   last 128 bytes.  On AMD AVX CPUs, the state size is 832 bytes and
> > +   this test might fail without the fix.  */
> > +#ifndef OFFSET
> > +# define OFFSET 104
> > +#endif
> > +
> > +       .text
> > +       .p2align 4
> > +       .globl  apply_tls
> > +       .type   apply_tls, @function
> > +apply_tls:
> > +       cfi_startproc
> > +       _CET_ENDBR
> > +       pushq   %rbp
> > +       cfi_def_cfa_offset (16)
> > +       cfi_offset (6, -16)
> > +       movdqu  (%RDI_LP), %xmm0
> > +       lea     tls_var1@TLSDESC(%rip), %RAX_LP
> > +       mov     %RSP_LP, %RBP_LP
> > +       cfi_def_cfa_register (6)
> > +       /* Align stack to 64 bytes.  */
> > +       and     $-64, %RSP_LP
> > +       sub     $OFFSET, %RSP_LP
> > +       pushq   %rbx
> > +       /* Set %ebx to 0xbadbeef.  */
> > +       movl    $0xbadbeef, %ebx
> > +       movl    $0xbadbeef, %esi
> > +       movq    %rdi, saved_rdi(%rip)
> > +       movq    %rsi, saved_rsi(%rip)
> > +       call    *tls_var1@TLSCALL(%RAX_LP)
> > +       /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx.  */
> > +       cmpq    saved_rdi(%rip), %rdi
> > +       jne     L(hlt)
> > +       cmpq    saved_rsi(%rip), %rsi
> > +       jne     L(hlt)
> > +       cmpl    $0xbadbeef, %ebx
> > +       jne     L(hlt)
> > +       add     %fs:0, %RAX_LP
> > +       movups  %xmm0, 32(%RAX_LP)
> > +       movdqu  16(%RDI_LP), %xmm1
> > +       mov     %RAX_LP, %RBX_LP
> > +       movups  %xmm1, 48(%RAX_LP)
> > +       lea     32(%RBX_LP), %RAX_LP
> > +       pop     %rbx
> > +       leave
> > +       cfi_def_cfa (7, 8)
> > +       ret
> > +L(hlt):
> > +       hlt
> > +       cfi_endproc
> > +       .size   apply_tls, .-apply_tls
> > +       .hidden tls_var1
> > +       .globl  tls_var1
> > +       .section        .tbss,"awT",@nobits
> > +       .align 16
> > +       .type   tls_var1, @object
> > +       .size   tls_var1, 3200
> > +tls_var1:
> > +       .zero   3200
> > +       .local  saved_rdi
> > +       .comm   saved_rdi,8,8
> > +       .local  saved_rsi
> > +       .comm   saved_rsi,8,8
> > +       .section        .note.GNU-stack,"",@progbits
> > --
> > 2.44.0
> >
>
> I need to adjust assembly codes.
>

Never mind.  Not needed.


-- 
H.J.

  reply	other threads:[~2024-03-17 13:05 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-17 12:55 [PATCH v4] x86-64: Allocate state buffer space for RDI, RSI and RBX H.J. Lu
2024-03-17 13:02 ` H.J. Lu
2024-03-17 13:04   ` H.J. Lu [this message]
2024-03-18 10:22 ` Florian Weimer
2024-03-18 12:32   ` H.J. Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://www.gnu.org/software/libc/involved.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAMe9rOrSbFdpXnyBA3y7GfnX+Rr=_APFDgpkvu955_0UOqqX9w@mail.gmail.com' \
    --to=hjl.tools@gmail.com \
    --cc=fweimer@redhat.com \
    --cc=libc-alpha@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).