#include "sysdep.h"
.register %g2, #scratch
.register %g3, #scratch
ENTRY(__mpn_rshift)
ldx [%o1],%g2 ! load first limb
sub %g0,%o3,%o5 ! negate shift count
add %o2,-1,%o2
andcc %o2,4-1,%g4 ! number of limbs in first loop
sllx %g2,%o5,%g1 ! compute function result
be,pn %xcc,.L0 ! if multiple of 4 limbs, skip first loop
mov %g1,%g5
sub %o2,%g4,%o2 ! adjust count for main loop
.Loop0: ldx [%o1+8],%g3
add %o0,8,%o0
add %o1,8,%o1
srlx %g2,%o3,%o4
addcc %g4,-1,%g4
sllx %g3,%o5,%g1
mov %g3,%g2
or %o4,%g1,%o4
bne,pt %xcc,.Loop0
stx %o4,[%o0-8]
.L0: brz,pn %o2,.Lend
nop
.Loop: ldx [%o1+8],%g3
add %o0,32,%o0
srlx %g2,%o3,%o4
addcc %o2,-4,%o2
sllx %g3,%o5,%g1
ldx [%o1+16],%g2
srlx %g3,%o3,%g4
or %o4,%g1,%o4
stx %o4,[%o0-32]
sllx %g2,%o5,%g1
ldx [%o1+24],%g3
srlx %g2,%o3,%o4
or %g4,%g1,%g4
stx %g4,[%o0-24]
sllx %g3,%o5,%g1
ldx [%o1+32],%g2
srlx %g3,%o3,%g4
or %o4,%g1,%o4
stx %o4,[%o0-16]
sllx %g2,%o5,%g1
add %o1,32,%o1
or %g4,%g1,%g4
bne,pt %xcc,.Loop
stx %g4,[%o0-8]
.Lend: srlx %g2,%o3,%g2
stx %g2,[%o0-0]
jmpl %o7+8,%g0
mov %g5,%o0
END(__mpn_rshift)