VMEM_ALIGNUP
start = VMEM_ALIGNUP(start - phase, align) + phase;
start = VMEM_ALIGNUP(start - phase, nocross) + phase;