From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: AS3215 2.6.0.0/16 X-Spam-Status: No, score=-4.2 required=3.0 tests=AWL,BAYES_00,DKIM_SIGNED, DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED,SPF_HELO_PASS,SPF_PASS shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from sourceware.org (server2.sourceware.org [IPv6:2620:52:3:1:0:246e:9693:128c]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by dcvr.yhbt.net (Postfix) with ESMTPS id 7A0E41F953 for ; Wed, 27 Oct 2021 12:49:15 +0000 (UTC) Received: from server2.sourceware.org (localhost [IPv6:::1]) by sourceware.org (Postfix) with ESMTP id E5C55385801F for ; Wed, 27 Oct 2021 12:49:13 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E5C55385801F DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1635338953; bh=CHfRYDg60rwJkfQSRB7yM+bBoVAa53OhGBf3QSLuzB4=; h=References:In-Reply-To:Date:Subject:To:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:List-Subscribe:From:Reply-To:Cc: From; b=hcLa2UADHsVMPpVipphmpTiGgjj7qKF1TV4g5beC5dKEbOd2EoCAO+r/PTlOyAVBL hd01Yqj3hqqwcONew/SmAgj/1687bxcu9ix62pN6u4gMUJCk+NmQJ5RY4VXrGEQLEb gdd2JWaWJKhfIoxBhDm3Olzu3efL05uyqJUGqQuk= Received: from mail-pj1-x1036.google.com (mail-pj1-x1036.google.com [IPv6:2607:f8b0:4864:20::1036]) by sourceware.org (Postfix) with ESMTPS id 95CD8385801F for ; Wed, 27 Oct 2021 12:48:45 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 95CD8385801F Received: by mail-pj1-x1036.google.com with SMTP id x1-20020a17090a530100b001a1efa4ebe6so3305454pjh.0 for ; Wed, 27 Oct 2021 05:48:45 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=x-gm-message-state:mime-version:references:in-reply-to:from:date :message-id:subject:to:cc; bh=CHfRYDg60rwJkfQSRB7yM+bBoVAa53OhGBf3QSLuzB4=; b=b20wfmniJckzu10wq1XvWUzaVIGHK8Qu1mCqrCSKqCtIeVjH+iHL43BWuyo7c6uR8q Are8nZIwFqIKiP+aSDheVwC9yz8F8UFFuQovsHl+/bzp7+PghKVDrx3A2S0Okst9oZU8 GR0EjZHQ0NPb+95q2te9X2Sj/hWfi80BWIkQAKvYEv5JDu11DuHLmQ8cqv0nmbbhX3H/ qqllX6ZDj8zHQXwMRppbYnAOaKfAdVDo5L2qykjisvomeuoA5LDJs4yntDcX2sOAz8xy Q8dWiXhCustsQrM7jEdRZT423l09C3S0dIy6QJAeBkRbHpK857O7GCGMw0LPQ0LGPRX9 jxmw== X-Gm-Message-State: AOAM530EYiekjwIoH2NAzWA9Krr3021Lq9IJIyAC/weggtal+v2OOBpM R3PHmPdinKvcXn+1w94pToxOb+jD+vzVCc7eFGc= X-Google-Smtp-Source: ABdhPJzzAf2UvoXXcdribPVq2D6b0aIgRRuuFRcuZgPTWwY51jYAOTtXenySRJ2I8PB6rgpLeYIPQZ0PPYN82T6XArY= X-Received: by 2002:a17:903:1111:b0:13f:d1d7:fb67 with SMTP id n17-20020a170903111100b0013fd1d7fb67mr27457126plh.85.1635338924716; Wed, 27 Oct 2021 05:48:44 -0700 (PDT) MIME-Version: 1.0 References: <20211027024323.1199441-1-goldstein.w.n@gmail.com> <20211027024323.1199441-4-goldstein.w.n@gmail.com> In-Reply-To: <20211027024323.1199441-4-goldstein.w.n@gmail.com> Date: Wed, 27 Oct 2021 05:48:03 -0700 Message-ID: Subject: Re: [PATCH v1 4/6] x86_64: Add sse2 optimized __memcmpeq in memcmp-sse2.S To: Noah Goldstein Content-Type: text/plain; charset="UTF-8" X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , From: "H.J. Lu via Libc-alpha" Reply-To: "H.J. Lu" Cc: GNU C Library Errors-To: libc-alpha-bounces+e=80x24.org@sourceware.org Sender: "Libc-alpha" On Tue, Oct 26, 2021 at 7:43 PM Noah Goldstein wrote: > > No bug. This commit does not modify any of the memcmp > implementation. It just adds __memcmpeq ifdefs to skip obvious cases > where computing the proper 1/-1 required by memcmp is not needed. > --- > sysdeps/x86_64/memcmp.S | 55 ++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 51 insertions(+), 4 deletions(-) > > diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S > index b53f2c0866..c245383963 100644 > --- a/sysdeps/x86_64/memcmp.S > +++ b/sysdeps/x86_64/memcmp.S > @@ -49,34 +49,63 @@ L(s2b): > movzwl (%rdi), %eax > movzwl (%rdi, %rsi), %edx > subq $2, %r10 > +#ifdef USE_AS_MEMCMPEQ > + je L(finz1) > +#else > je L(fin2_7) > +#endif > addq $2, %rdi > cmpl %edx, %eax > +#ifdef USE_AS_MEMCMPEQ > + jnz L(neq_early) > +#else > jnz L(fin2_7) > +#endif > L(s4b): > testq $4, %r10 > jz L(s8b) > movl (%rdi), %eax > movl (%rdi, %rsi), %edx > subq $4, %r10 > +#ifdef USE_AS_MEMCMPEQ > + je L(finz1) > +#else > je L(fin2_7) > +#endif > addq $4, %rdi > cmpl %edx, %eax > +#ifdef USE_AS_MEMCMPEQ > + jnz L(neq_early) > +#else > jnz L(fin2_7) > +#endif > L(s8b): > testq $8, %r10 > jz L(s16b) > movq (%rdi), %rax > movq (%rdi, %rsi), %rdx > subq $8, %r10 > +#ifdef USE_AS_MEMCMPEQ > + je L(sub_return8) > +#else > je L(fin2_7) > +#endif > addq $8, %rdi > cmpq %rdx, %rax > +#ifdef USE_AS_MEMCMPEQ > + jnz L(neq_early) > +#else > jnz L(fin2_7) > +#endif > L(s16b): > movdqu (%rdi), %xmm1 > movdqu (%rdi, %rsi), %xmm0 > pcmpeqb %xmm0, %xmm1 > +#ifdef USE_AS_MEMCMPEQ > + pmovmskb %xmm1, %eax > + subl $0xffff, %eax > + ret > +#else > pmovmskb %xmm1, %edx > xorl %eax, %eax > subl $0xffff, %edx > @@ -86,7 +115,7 @@ L(s16b): > movzbl (%rcx), %eax > movzbl (%rsi, %rcx), %edx > jmp L(finz1) > - > +#endif > .p2align 4,, 4 > L(finr1b): > movzbl (%rdi), %eax > @@ -95,7 +124,15 @@ L(finz1): > subl %edx, %eax > L(exit): > ret > - > +#ifdef USE_AS_MEMCMPEQ > + .p2align 4,, 4 > +L(sub_return8): > + subq %rdx, %rax > + movl %eax, %edx > + shrq $32, %rax > + orl %edx, %eax > + ret > +#else > .p2align 4,, 4 > L(fin2_7): > cmpq %rdx, %rax > @@ -111,12 +148,17 @@ L(fin2_7): > movzbl %dl, %edx > subl %edx, %eax > ret > - > +#endif > .p2align 4,, 4 > L(finz): > xorl %eax, %eax > ret > - > +#ifdef USE_AS_MEMCMPEQ > + .p2align 4,, 4 > +L(neq_early): > + movl $1, %eax > + ret > +#endif > /* For blocks bigger than 32 bytes > 1. Advance one of the addr pointer to be 16B aligned. > 2. Treat the case of both addr pointers aligned to 16B > @@ -246,11 +288,16 @@ L(mt16): > > .p2align 4,, 4 > L(neq): > +#ifdef USE_AS_MEMCMPEQ > + movl $1, %eax > + ret > +#else > bsfl %edx, %ecx > movzbl (%rdi, %rcx), %eax > addq %rdi, %rsi > movzbl (%rsi,%rcx), %edx > jmp L(finz1) > +#endif > > .p2align 4,, 4 > L(ATR): > -- > 2.25.1 > LGTM. Reviewed-by: H.J. Lu Thanks. -- H.J.