root/usr/src/lib/libc/sparcv9/gen/strncmp.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "strncmp.s"

/*
 * strncmp(s1, s2, n)
 *
 * Compare strings (at most n bytes):  s1>s2: >0  s1==s2: 0  s1<s2: <0
 *
 * Fast assembler language version of the following C-program for strncmp
 * which represents the `standard' for the C-library.
 *
 *      int
 *      strncmp(const char *s1, const char *s2, size_t n)
 *      {
 *              n++;
 *              if (s1 == s2)
 *                      return (0);
 *              while (--n != 0 && *s1 == *s2++)
 *                      if(*s1++ == '\0')
 *                              return(0);
 *              return ((n == 0) ? 0 : (*s1 - s2[-1]));
 *      }
 */

#include <sys/asm_linkage.h>

        ENTRY(strncmp)
        save    %sp, -SA(WINDOWSIZE), %sp
        cmp     %i2, 8
        blu,a,pn %xcc, .cmp_bytes       ! for small counts go do bytes
        sub     %i0, %i1, %i0           ! delay slot, get diff from s1 - s2
        andcc   %i0, 3, %g0             ! is s1 aligned
1:      bz,pn   %icc, .iss2             ! if so go check s2
        andcc   %i1, 3, %i3             ! is s2 aligned

        deccc   %i2                     ! --n >= 0 ?
        bcs,pn  %xcc, .doneq
        nop                             ! delay slot

        ldub    [%i0], %i4              ! else cmp one byte
        ldub    [%i1], %i5
        inc     %i0
        cmp     %i4, %i5
        bne,pn  %icc, .noteqb
        inc     %i1
        tst     %i4                     ! terminating zero
        bnz,pt  %icc, 1b
        andcc   %i0, 3, %g0
        b,a     .doneq

.iss2:
        set     0x7efefeff, %l6
        set     0x81010100, %l7
        sethi   %hi(0xff000000), %l0    ! masks to test for terminating null
        sethi   %hi(0x00ff0000), %l1
        srl     %l1, 8, %l2             ! generate 0x0000ff00 mask

        bz,pn   %icc, .w4cmp            ! if s2 word aligned, compare words
        cmp     %i3, 2                  ! check if s2 half aligned
        be,pn   %icc, .w2cmp
        cmp     %i3, 1                  ! check if aligned to 1 or 3 bytes
.w3cmp: ldub    [%i1], %i5
        inc     1, %i1
        be,pt   %icc, .w1cmp
        sll     %i5, 24, %i5
        sub     %i0, %i1, %i0
2:
        deccc   4, %i2                  ! n >= 4 ?
        bgeu,a,pt %xcc, 3f
        lduw    [%i1], %i3              ! delay slot
        dec     %i1                     ! reset s2
        inc     %i0                     ! reset s1 diff
        b       .cmp_bytes              ! do a byte at a time if n < 4
        inc     4, %i2
3:
        lduw    [%i0 + %i1], %i4
        inc     4, %i1
        srl     %i3, 8, %l4             ! merge with the other half
        or      %l4, %i5, %i5
        cmp     %i4, %i5
        be,pn   %icc, 1f

        add     %i4, %l6, %l3
        b,a     .noteq
1:      xor     %l3, %i4, %l3
        and     %l3, %l7, %l3
        cmp     %l3, %l7
        be,a,pt %icc, 2b
        sll     %i3, 24, %i5

        !
        ! For 7-bit characters, we know one of the bytes is zero, but for
        ! 8-bit characters, the zero detection algorithm gives some false
        ! triggers ... check every byte individually.
        !
        andcc   %i4, %l0, %g0           ! check if first byte was zero
        bnz,pt  %icc, 1f
        andcc   %i4, %l1, %g0           ! check if second byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, %l2, %g0           ! check if third byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, 0xff, %g0          ! check if last byte is zero
        b,a     .doneq
1:      bnz,pn  %icc, 2b
        sll     %i3, 24, %i5
        b,a     .doneq

.w1cmp: clr     %l4
        lduh    [%i1], %l4
        inc     2, %i1
        sll     %l4, 8, %l4
        or      %i5, %l4, %i5

        sub     %i0, %i1, %i0
3:
        deccc   4, %i2                  ! n >= 4 ?
        bgeu,a,pt %xcc, 4f
        lduw    [%i1], %i3              ! delay slot
        dec     3, %i1                  ! reset s2
        inc     3, %i0                  ! reset s1 diff
        b       .cmp_bytes              ! do a byte at a time if n < 4
        inc     4, %i2
4:
        lduw    [%i0 + %i1], %i4
        inc     4, %i1
        srl     %i3, 24, %l4            ! merge with the other half
        or      %l4, %i5, %i5
        cmp     %i4, %i5
        be,pt   %icc, 1f

        add     %i4, %l6, %l3
        b,a     .noteq
1:      xor     %l3, %i4, %l3
        and     %l3, %l7, %l3
        cmp     %l3, %l7
        be,a,pt %icc, 3b
        sll     %i3, 8, %i5

        andcc   %i4, %l0, %g0           ! check if first byte was zero
        bnz,pt  %icc, 1f
        andcc   %i4, %l1, %g0           ! check if second byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, %l2, %g0           ! check if third byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, 0xff, %g0          ! check if last byte is zero
        b,a     .doneq
1:      bnz,pn  %icc, 3b
        sll     %i3, 8, %i5
        b,a     .doneq

.w2cmp:
        lduh    [%i1], %i5              ! read a halfword to align s2
        inc     2, %i1
        sll     %i5, 16, %i5

        sub     %i0, %i1, %i0
4:
        deccc   4, %i2                  ! n >= 4 ?
        bgeu,a,pt %xcc, 5f
        lduw    [%i1], %i3              ! delay slot
        dec     2, %i1                  ! reset s2
        inc     2, %i0                  ! reset s1 diff
        b       .cmp_bytes              ! do a byte at a time if n < 4
        inc     4, %i2                  ! delay slot
5:
        lduw    [%i1 + %i0], %i4        ! read a word from s2
        inc     4, %i1
        srl     %i3, 16, %l4            ! merge with the other half
        or      %l4, %i5, %i5
        cmp     %i4, %i5
        be,pt   %icc, 1f

        add     %i4, %l6, %l3
        b,a     .noteq
1:      xor     %l3, %i4, %l3           ! are any bytes 0?
        and     %l3, %l7, %l3
        cmp     %l3, %l7
        be,a,pt %icc, 4b
        sll     %i3, 16, %i5

        andcc   %i4, %l0, %g0           ! check if first byte was zero
        bnz,pt  %icc, 1f
        andcc   %i4, %l1, %g0           ! check if second byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, %l2, %g0           ! check if third byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, 0xff, %g0          ! check if last byte is zero
        b,a     .doneq
1:      bnz,pn  %icc, 4b
        sll     %i3, 16, %i5
        b,a     .doneq

.w4cmp: sub     %i0, %i1, %i0
        lduw    [%i1], %i5              ! read a word from s1
5:      cmp     %i2, 0
        be,a,pn %xcc, .doneq
        nop
        lduw    [%i1], %i5              ! read a word from s1
        deccc   4, %i2                  ! n >= 4 ?
        bcs,a,pn %xcc, .cmp_bytes       ! do a byte at a time if n < 4
        inc     4, %i2

        lduw    [%i1 + %i0], %i4        ! read a word from s2
        cmp     %i4, %i5
        inc     4, %i1
        be,pt   %icc, 1f

        add     %i4, %l6, %l3
        b,a     .noteq
1:      xor     %l3, %i4, %l3
        and     %l3, %l7, %l3
        cmp     %l3, %l7
        be,pt   %icc, 5b
        nop

        andcc   %i4, %l0, %g0           ! check if first byte was zero
        bnz,pt  %icc, 1f
        andcc   %i4, %l1, %g0           ! check if second byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, %l2, %g0           ! check if third byte was zero
        b,a     .doneq
1:      bnz,pt  %icc, 1f
        andcc   %i4, 0xff, %g0          ! check if last byte is zero
        b,a     .doneq
1:      bnz,a,pn %icc, 5b
        lduw    [%i1], %i5
.doneq: ret
        restore %g0, %g0, %o0           ! equal return zero

.noteq: srl     %i4, 24, %l4
        srl     %i5, 24, %l5
        subcc   %l4, %l5, %i0
        bne,pt  %icc, 6f
        andcc   %l4, 0xff, %g0
        bz      .doneq
        sll     %i4, 8, %l4
        sll     %i5, 8, %l5
        srl     %l4, 24, %l4
        srl     %l5, 24, %l5
        subcc   %l4, %l5, %i0
        bne,pt  %icc, 6f
        andcc   %l4, 0xff, %g0
        bz,pt   %icc, .doneq
        sll     %i4, 16, %l4
        sll     %i5, 16, %l5
        srl     %l4, 24, %l4
        srl     %l5, 24, %l5
        subcc   %l4, %l5, %i0
        bne,pt  %icc, 6f
        andcc   %l4, 0xff, %g0
        bz,pt   %icc, .doneq
        nop
.noteqb:
        and     %i4, 0xff, %l4
        and     %i5, 0xff, %l5
        subcc   %l4, %l5, %i0
6:      ret
        restore %i0, %g0, %o0

        ! Do a byte by byte comparison, disregarding alignments
.cmp_bytes:
        deccc   %i2                     ! --n >= 0 ?
1:
        bcs,pn  %xcc, .doneq
        nop                             ! delay slot
        ldub    [%i1 + %i0], %i4        ! read a byte from s1
        ldub    [%i1], %i5              ! read a byte from s2

        inc     %i1
        cmp     %i4, %i5
        bne,pt  %icc, .noteqb
        tst     %i4                     ! terminating zero
        bnz,pt  %icc, 1b
        deccc   %i2                     ! --n >= 0
        b,a     .doneq

        SET_SIZE(strncmp)