root/usr/src/lib/libc/sparcv9/gen/memcmp.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "memcmp.s"

/*
 * memcmp(s1, s2, len)
 *
 * Compare n bytes:  s1>s2: >0  s1==s2: 0  s1<s2: <0
 *
 * Fast assembler language version of the following C-program for memcmp
 * which represents the `standard' for the C-library.
 *
 *      int
 *      memcmp(const void *s1, const void *s2, size_t n)
 *      {
 *              if (s1 != s2 && n != 0) {
 *                      const char *ps1 = s1;
 *                      const char *ps2 = s2;
 *                      do {
 *                              if (*ps1++ != *ps2++)
 *                                      return (ps1[-1] - ps2[-1]);
 *                      } while (--n != 0);
 *              }
 *              return (0);
 *      }
 */

#include <sys/asm_linkage.h>

        ANSI_PRAGMA_WEAK(memcmp,function)

        ENTRY(memcmp)
        cmp     %o0, %o1                ! s1 == s2?
        be,pn   %xcc, .cmpeq
        cmp     %o2, 17
        bleu,a,pn %xcc, .cmpbyt         ! for small counts go do bytes
        sub     %o1, %o0, %o1

        andcc   %o0, 3, %o3             ! is s1 aligned?
        bz,a,pn %icc, .iss2             ! if so go check s2
        andcc   %o1, 3, %o4             ! is s2 aligned?
        cmp     %o3, 2
        be,pn   %icc, .algn2
        cmp     %o3, 3

.algn1: ldub    [%o0], %o4              ! cmp one byte
        inc     %o0
        ldub    [%o1], %o5
        inc     %o1
        dec     %o2
        be,pn   %icc, .algn3
        cmp     %o4, %o5
        be,pt   %icc, .algn2
        nop
        b,a     .noteq

.algn2: lduh    [%o0], %o4
        inc     2, %o0
        ldub    [%o1], %o5
        inc     1, %o1
        srl     %o4, 8, %o3
        cmp     %o3, %o5
        be,a,pt %icc, 1f
        ldub    [%o1], %o5              ! delay slot, get next byte from s2
        b       .noteq
        mov     %o3, %o4                ! delay slot, move *s1 to %o4
1:      inc     %o1
        dec     2, %o2
        and     %o4, 0xff, %o4
        cmp     %o4, %o5
.algn3: be,a,pt %icc, .iss2
        andcc   %o1, 3, %o4             ! delay slot, is s2 aligned?
        b,a     .noteq

.cmpbyt:b       .bytcmp
        deccc   %o2
1:      ldub    [%o0 + %o1], %o5        ! byte compare loop
        inc     %o0
        cmp     %o4, %o5
        be,a,pt %icc, .bytcmp
        deccc   %o2                     ! delay slot, compare count (len)
        b,a     .noteq
.bytcmp:bgeu,a,pt %xcc, 1b
        ldub    [%o0], %o4
.cmpeq:
        retl                            ! strings compare equal
        clr     %o0

.noteq_word:                            ! words aren't equal. find unequal byte
        srl     %o4, 24, %o1            ! first byte
        srl     %o5, 24, %o2
        cmp     %o1, %o2
        bne,pn  %icc, 1f
        sll     %o4, 8, %o4
        sll     %o5, 8, %o5
        srl     %o4, 24, %o1
        srl     %o5, 24, %o2
        cmp     %o1, %o2
        bne,pn  %icc, 1f
        sll     %o4, 8, %o4
        sll     %o5, 8, %o5
        srl     %o4, 24, %o1
        srl     %o5, 24, %o2
        cmp     %o1, %o2
        bne,pn  %icc, 1f
        sll     %o4, 8, %o4
        sll     %o5, 8, %o5
        srl     %o4, 24, %o1
        srl     %o5, 24, %o2
1:
        retl
        sub     %o1, %o2, %o0           ! delay slot

.noteq:
        retl                            ! strings aren't equal
        sub     %o4, %o5, %o0           ! delay slot, return(*s1 - *s2)

.iss2:  andn    %o2, 3, %o3             ! count of aligned bytes
        and     %o2, 3, %o2             ! remaining bytes
        bz,pn   %icc, .w4cmp            ! if s2 word aligned, compare words
        cmp     %o4, 2
        be,pn   %icc, .w2cmp            ! s2 half aligned
        cmp     %o4, 1

.w3cmp:
        dec     4, %o3                  ! avoid reading beyond the last byte
        inc     4, %o2
        ldub    [%o1], %g1              ! read a byte to align for word reads
        inc     1, %o1
        be,pt   %icc, .w1cmp            ! aligned to 1 or 3 bytes
        sll     %g1, 24, %o5

        sub     %o1, %o0, %o1
2:      lduw    [%o0 + %o1], %g1
        lduw    [%o0], %o4
        inc     4, %o0
        srl     %g1, 8, %g5             ! merge with the other half
        or      %g5, %o5, %o5
        cmp     %o4, %o5
        bne,pt  %icc, .noteq_word
        deccc   4, %o3
        bnz,pt  %xcc, 2b
        sll     %g1, 24, %o5
        sub     %o1, 1, %o1             ! used 3 bytes of the last word read
        b       .bytcmp
        deccc   %o2

.w1cmp:
        dec     4, %o3                  ! avoid reading beyond the last byte
        inc     4, %o2
        lduh    [%o1], %g1              ! read 3 bytes to word align
        inc     2, %o1
        sll     %g1, 8, %g5
        or      %o5, %g5, %o5

        sub     %o1, %o0, %o1
3:      lduw    [%o0 + %o1], %g1
        lduw    [%o0], %o4
        inc     4, %o0
        srl     %g1, 24, %g5            ! merge with the other half
        or      %g5, %o5, %o5
        cmp     %o4, %o5
        bne,pt  %icc, .noteq_word
        deccc   4, %o3
        bnz,pt  %xcc, 3b
        sll     %g1, 8, %o5
        sub     %o1, 3, %o1             ! used 1 byte of the last word read
        b       .bytcmp
        deccc   %o2

.w2cmp:
        dec     4, %o3                  ! avoid reading beyond the last byte
        inc     4, %o2
        lduh    [%o1], %g1              ! read a halfword to align s2
        inc     2, %o1
        sll     %g1, 16, %o5
        sub     %o1, %o0, %o1
4:      lduw    [%o0 + %o1], %g1        ! read a word from s2
        lduw    [%o0], %o4              ! read a word from s1
        inc     4, %o0
        srl     %g1, 16, %g5            ! merge with the other half
        or      %g5, %o5, %o5
        cmp     %o4, %o5
        bne,pn  %icc, .noteq_word
        deccc   4, %o3
        bnz,pt  %xcc, 4b
        sll     %g1, 16, %o5
        sub     %o1, 2, %o1             ! only used half of the last read word
        b       .bytcmp
        deccc   %o2

.w4cmp:
        sub     %o1, %o0, %o1
        lduw    [%o0 + %o1], %o5
5:      lduw    [%o0], %o4
        inc     4, %o0
        cmp     %o4, %o5
        bne,pt  %icc, .noteq_word
        deccc   4, %o3
        bnz,a,pt %xcc, 5b
        lduw    [%o0 + %o1], %o5
        b       .bytcmp                 ! compare remaining bytes, if any
        deccc   %o2

        SET_SIZE(memcmp)