root/usr/src/lib/libc/sparc/gen/strncpy.S
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

        .file   "strncpy.s"

/*
 * strncpy(s1, s2)
 *
 * Copy string s2 to s1, truncating or null-padding to always copy n bytes
 * return s1.
 *
 * Fast assembler language version of the following C-program for strncpy
 * which represents the `standard' for the C-library.
 *
 *      char *
 *      strncpy(char *s1, const char *s2, size_t n)
 *      {
 *              char *os1 = s1;
 *
 *              n++;
 *              while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
 *                      ;
 *              if (n != 0)
 *                      while (--n != 0)
 *                              *s1++ = '\0';
 *              return (os1);
 *      }
 */

#include <sys/asm_linkage.h>

        ! strncpy works similarly to strcpy, except that n bytes of s2
        ! are copied to s1. If a null character is reached in s2 yet more
        ! bytes remain to be copied, strncpy will copy null bytes into
        ! the destination string.
        !
        ! This implementation works by first aligning the src ptr and
        ! performing small copies until it is aligned.  Then, the string
        ! is copied based upon destination alignment.  (byte, half-word,
        ! word, etc.)

        ENTRY(strncpy)

        .align 32
        subcc   %g0, %o2, %o4           ! n = -n
        bz      .doneshort              ! if n == 0, done
        cmp     %o2, 7                  ! n < 7 ?
        add     %o1, %o2, %o3           ! src = src + n
        blu     .shortcpy               ! n < 7, use byte-wise copy
        add     %o0, %o2, %o2           ! dst = dst + n
        andcc   %o1, 3, %o5             ! src word aligned ?
        bz      .wordaligned            ! yup
        save    %sp, -0x40, %sp         ! create new register window
        sub     %i5, 4, %i5             ! bytes until src aligned
        nop                             ! align loop on 16-byte boundary
        nop                             ! align loop on 16-byte boundary

.alignsrc:
        ldub    [%i3 + %i4], %i1        ! src[]
        stb     %i1, [%i2 + %i4]        ! dst[] = src[]
        inccc   %i4                     ! src++, dst++, n--
        bz      .done                   ! n == 0, done
        tst     %i1                     ! end of src reached (null byte) ?
        bz,a    .bytepad                ! yes, at least one byte to pad here
        add     %i2, %i4, %l0           ! need single dest pointer for fill
        inccc   %i5                     ! src aligned now?
        bnz     .alignsrc               ! no, copy another byte
        .empty

.wordaligned:
        add     %i2, %i4, %l0           ! dst
        sethi   %hi(0x01010101), %l1    ! Alan Mycroft's magic1
        sub     %i2, 4, %i2             ! adjust for dest pre-incr in cpy loops
        or      %l1, %lo(0x01010101),%l1!  finish loading magic1
        andcc   %l0, 3, %g1             ! destination word aligned ?
        bnz     .dstnotaligned          ! nope
        sll     %l1, 7, %i5             ! create Alan Mycroft's magic2

.storeword:
        lduw    [%i3 + %i4], %i1        ! src dword
        addcc   %i4, 4, %i4             ! n += 4, src += 4, dst += 4
        bcs     .lastword               ! if counter wraps, last word
        andn    %i5, %i1, %g1           ! ~dword & 0x80808080
        sub     %i1, %l1, %l0           ! dword - 0x01010101
        andcc   %l0, %g1, %g0           ! ((dword - 0x01010101) & ~dword & 0x80808080)
        bz,a    .storeword              ! no zero byte if magic expression == 0
        stw     %i1, [%i2 + %i4]        ! store word to dst (address pre-incremented)

        ! n has not expired, but src is at the end. we need to push out the
        ! remaining src bytes and then start padding with null bytes

.zerobyte:
        add     %i2, %i4, %l0           ! pointer to dest string
        srl     %i1, 24, %g1            ! first byte
        stb     %g1, [%l0]              ! store it
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null bytes
        srl     %i1, 16, %g1            ! second byte
        stb     %g1, [%l0 + 1]          ! store it
        and     %g1, 0xff, %g1          ! isolate byte
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null bytes
        srl     %i1, 8, %g1             ! third byte
        stb     %g1, [%l0 + 2]          ! store it
        and     %g1, 0xff, %g1          ! isolate byte
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null bytes
        stb     %i1, [%l0 + 3]          ! store fourth byte
        addcc   %i4, 8, %g0             ! number of pad bytes < 8 ?
        bcs     .bytepad                ! yes, do simple byte wise fill
        add     %l0, 4, %l0             ! dst += 4
        andcc   %l0, 3, %l1             ! dst offset relative to word boundary
        bz      .fillaligned            ! dst already word aligned

        ! here there is a least one more byte to zero out: otherwise we would
        ! have exited through label .lastword

        sub     %l1, 4, %l1             ! bytes to align dst to word boundary
.makealigned:
        stb     %g0, [%l0]              ! dst[] = 0
        addcc   %i4, 1, %i4             ! n--
        bz      .done                   ! n == 0, we are done
        addcc   %l1, 1, %l1             ! any more byte needed to align
        bnz     .makealigned            ! yup, pad another byte
        add     %l0, 1, %l0             ! dst++
        nop                             ! pad to align copy loop below

        ! here we know that there at least another 4 bytes to pad, since
        ! we don't get here unless there were >= 8 bytes to pad to begin
        ! with, and we have padded at most 3 bytes suring dst aligning

.fillaligned:
        add     %i4, 3, %i2             ! round up to next word boundary
        and     %i2, -4, %l1            ! pointer to next word boundary
        and     %i2, 4, %i2             ! word count odd ? 4 : 0
        stw     %g0, [%l0]              ! store first word
        addcc   %l1, %i2, %l1           ! dword count == 1 ?
        add     %i4, %i2, %i4           ! if word count odd, n -= 4
        bz      .bytepad                ! if word count == 1, pad bytes left
        add     %l0, %i2, %l0           ! bump dst if word count odd

.fillword:
        addcc   %l1, 8, %l1             ! count -= 8
        stw     %g0, [%l0]              ! dst[n] = 0
        stw     %g0, [%l0 + 4]          ! dst[n+4] = 0
        add     %l0, 8, %l0             ! dst += 8
        bcc     .fillword               ! fill words until count == 0
        addcc   %i4, 8, %i4             ! n -= 8
        bz      .done                   ! if n == 0, we are done
        .empty

.bytepad:
        and     %i4, 1, %i2             ! byte count odd ? 1 : 0
        stb     %g0, [%l0]              ! store first byte
        addcc   %i4, %i2, %i4           ! byte count == 1 ?
        bz      .done                   ! yup, we are done
        add     %l0, %i2, %l0           ! bump pointer if odd

.fillbyte:
        addcc   %i4, 2, %i4             ! n -= 2
        stb     %g0, [%l0]              ! dst[n] = 0
        stb     %g0, [%l0 + 1]          ! dst[n+1] = 0
        bnz     .fillbyte               ! fill until n == 0
        add     %l0, 2, %l0             ! dst += 2

.done:
        ret                             ! done
        restore %i0, %g0, %o0           ! restore reg window, return dst

        ! this is the last word. It may contain null bytes. store bytes
        ! until n == 0. if null byte encountered, continue

.lastword:
        sub     %i4, 4, %i4             ! undo counter pre-increment
        add     %i2, 4, %i2             ! adjust dst for counter un-bumping

        srl     %i1, 24, %g1            ! first byte
        stb     %g1, [%i2 + %i4]        ! store it
        inccc   %i4                     ! n--
        bz      .done                   ! if n == 0, we're done
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null
        srl     %i1, 16, %g1            ! second byte
        stb     %g1, [%i2 + %i4]        ! store it
        inccc   %i4                     ! n--
        bz      .done                   ! if n == 0, we're done
        and     %g1, 0xff, %g1          ! isolate byte
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null
        srl     %i1, 8, %g1             ! third byte
        stb     %g1, [%i2 + %i4]        ! store it
        inccc   %i4                     ! n--
        bz      .done                   ! if n == 0, we're done
        and     %g1, 0xff, %g1          ! isolate byte
        sub     %g1, 1, %g1             ! byte == 0 ? -1 : byte - 1
        sra     %g1, 31, %g1            ! byte == 0 ? -1 : 0
        andn    %i1, %g1, %i1           ! if byte == 0, start padding with null
        ba      .done                   ! here n must be zero, we are done
        stb     %i1, [%i2 + %i4]        ! store fourth byte

.dstnotaligned:
        cmp     %g1, 2                  ! dst half word aligned?
        be      .storehalfword2         ! yup, store half word at a time
        .empty
.storebyte:
        lduw    [%i3 + %i4], %i1        ! x = src[]
        addcc   %i4, 4, %i4             ! src += 4, dst += 4, n -= 4
        bcs     .lastword               ! if counter wraps, last word
        andn    %i5, %i1, %g1           ! ~x & 0x80808080
        sub     %i1, %l1, %l0           ! x - 0x01010101
        andcc   %l0, %g1, %g0           ! ((x - 0x01010101) & ~x & 0x80808080)
        bnz     .zerobyte               ! end of src found, may need to pad
        add     %i2, %i4, %l0           ! dst (in pointer form)
        srl     %i1, 24, %g1            ! %g1<7:0> = 1st byte; half-word aligned now
        stb     %g1, [%l0]              ! store first byte
        srl     %i1, 8, %g1             ! %g1<15:0> = bytes 2, 3
        sth     %g1, [%l0 + 1]          ! store bytes 2, 3
        ba      .storebyte              ! next word
        stb     %i1, [%l0 + 3]          ! store fourth byte
        nop
        nop

.storehalfword:
        lduw    [%i3 + %i4], %i1        ! x = src[]
.storehalfword2:
        addcc   %i4, 4, %i4             ! src += 4, dst += 4, n -= 4
        bcs     .lastword               ! if counter wraps, last word
        andn    %i5, %i1, %g1           ! ~x & 0x80808080
        sub     %i1, %l1, %l0           ! x - 0x01010101
        andcc   %l0, %g1, %g0           ! ((x -0x01010101) & ~x & 0x8080808080)
        bnz     .zerobyte               ! x has zero byte, handle end cases
        add     %i2, %i4, %l0           ! dst (in pointer form)
        srl     %i1, 16, %g1            ! %g1<15:0> = bytes 1, 2
        sth     %g1, [%l0]              ! store bytes 1, 2
        ba      .storehalfword          ! next dword
        sth     %i1, [%l0 + 2]          ! store bytes 3, 4

.shortcpy:
        ldub    [%o3 + %o4], %o5        ! src[]
        stb     %o5, [%o2 + %o4]        ! dst[] = src[]
        inccc   %o4                     ! src++, dst++, n--
        bz      .doneshort              ! if n == 0, done
        tst     %o5                     ! src[] == 0 ?
        bnz,a   .shortcpy               ! nope, next byte
        nop                             ! empty delay slot

.padbyte:
        stb     %g0, [%o2 + %o4]        ! dst[] = 0
.padbyte2:
        addcc   %o4, 1, %o4             ! dst++, n--
        bnz,a   .padbyte2               ! if n != 0, next byte
        stb     %g0, [%o2 + %o4]        ! dst[] = 0
        nop                             ! align label below to 16-byte boundary

.doneshort:
        retl                            ! return from leaf
        nop                             ! empty delay slot
        SET_SIZE(strncpy)