root/usr/src/uts/sun4v/os/memseg.c
/*
 *
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include <sys/types.h>
#include <sys/cmn_err.h>
#include <sys/vm.h>
#include <sys/mman.h>
#include <vm/vm_dep.h>
#include <vm/seg_kmem.h>
#include <vm/seg_kpm.h>
#include <sys/mem_config.h>
#include <sys/sysmacros.h>

extern pgcnt_t pp_dummy_npages;
extern pfn_t *pp_dummy_pfn;     /* Array of dummy pfns. */

extern kmutex_t memseg_lists_lock;
extern struct memseg *memseg_va_avail;
extern struct memseg *memseg_alloc();

extern page_t *ppvm_base;
extern pgcnt_t ppvm_size;

static int sun4v_memseg_debug;

extern struct memseg *memseg_reuse(pgcnt_t);
extern void remap_to_dummy(caddr_t, pgcnt_t);

/*
 * The page_t memory for incoming pages is allocated from existing memory
 * which can create a potential situation where memory addition fails
 * because of shortage of existing memory.  To mitigate this situation
 * some memory is always reserved ahead of time for page_t allocation.
 * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
 * page_t allocation.  The added 256MB added memory could theoretically
 * allow an addition of 16GB.
 */
#define RSV_SIZE        0x40000000      /* add size with rsrvd page_t's 1G */

#ifdef  DEBUG
#define MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
#else
#define MEMSEG_DEBUG(...)
#endif

/*
 * The page_t's for the incoming memory are allocated from
 * existing pages.
 */
/*ARGSUSED*/
int
memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
{
        page_t          *pp, *opp, *epp;
        pgcnt_t         metapgs;
        int             i;
        struct seg      kseg;
        caddr_t         vaddr;

        /*
         * Verify incoming memory is within supported DR range.
         */
        if ((base + npgs) * sizeof (page_t) > ppvm_size)
                return (KPHYSM_ENOTSUP);

        opp = pp = ppvm_base + base;
        epp = pp + npgs;
        metapgs = btopr(npgs * sizeof (page_t));

        if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
            page_find(&mpvp, (u_offset_t)pp)) {
                /*
                 * Another memseg has page_t's in the same
                 * page which 'pp' resides.  This would happen
                 * if PAGESIZE is not an integral multiple of
                 * sizeof (page_t) and therefore 'pp'
                 * does not start on a page boundry.
                 *
                 * Since the other memseg's pages_t's still
                 * map valid pages, skip allocation of this page.
                 * Advance 'pp' to the next page which should
                 * belong only to the incoming memseg.
                 *
                 * If the last page_t in the current page
                 * crosses a page boundary, this should still
                 * work.  The first part of the page_t is
                 * already allocated.  The second part of
                 * the page_t will be allocated below.
                 */
                ASSERT(PAGESIZE % sizeof (page_t));
                pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
                metapgs--;
        }

        if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
            page_find(&mpvp, (u_offset_t)epp)) {
                /*
                 * Another memseg has page_t's in the same
                 * page which 'epp' resides.  This would happen
                 * if PAGESIZE is not an integral multiple of
                 * sizeof (page_t) and therefore 'epp'
                 * does not start on a page boundry.
                 *
                 * Since the other memseg's pages_t's still
                 * map valid pages, skip allocation of this page.
                 */
                ASSERT(PAGESIZE % sizeof (page_t));
                metapgs--;
        }

        ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));

        /*
         * Back metadata space with physical pages.
         */
        kseg.s_as = &kas;
        vaddr = (caddr_t)pp;

        for (i = 0; i < metapgs; i++)
                if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
                        panic("page_find(0x%p, %p)\n",
                            (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));

        /*
         * Allocate the metadata pages; these are the pages that will
         * contain the page_t's for the incoming memory.
         */
        if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
            PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
                MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
                    metapgs);
                return (KPHYSM_ERESOURCE);
        }

        ASSERT(ptp);
        ASSERT(metap);

        *ptp = (void *)opp;
        *metap = metapgs;

        return (KPHYSM_OK);
}

void
memseg_free_meta(void *ptp, pgcnt_t metapgs)
{
        int i;
        page_t *pp;
        u_offset_t off;

        if (!metapgs)
                return;

        off = (u_offset_t)ptp;

        ASSERT(off);
        ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));

        MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
            (uint64_t)off, metapgs);
        /*
         * Free pages allocated during add.
         */
        for (i = 0; i < metapgs; i++) {
                pp = page_find(&mpvp, off);
                ASSERT(pp);
                ASSERT(pp->p_szc == 0);
                page_io_unlock(pp);
                page_destroy(pp, 0);
                off += PAGESIZE;
        }
}

pfn_t
memseg_get_metapfn(void *ptp, pgcnt_t metapg)
{
        page_t *pp;
        u_offset_t off;

        off = (u_offset_t)ptp + ptob(metapg);

        ASSERT(off);
        ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));

        pp = page_find(&mpvp, off);
        ASSERT(pp);
        ASSERT(pp->p_szc == 0);
        ASSERT(pp->p_pagenum != PFN_INVALID);

        return (pp->p_pagenum);
}

/*
 * Remap a memseg's page_t's to dummy pages.  Skip the low/high
 * ends of the range if they are already in use.
 */
void
memseg_remap_meta(struct memseg *seg)
{
        int i;
        u_offset_t off;
        page_t *pp;
#if 0
        page_t *epp;
#endif
        pgcnt_t metapgs;

        metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
        ASSERT(metapgs);
        pp = seg->pages;
        seg->pages_end = seg->pages_base;
#if 0
        epp = seg->epages;

        /*
         * This code cannot be tested as the kernel does not compile
         * when page_t size is changed.  It is left here as a starting
         * point if the unaligned page_t size needs to be supported.
         */

        if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
            page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
                /*
                 * Another memseg has page_t's in the same
                 * page which 'pp' resides.  This would happen
                 * if PAGESIZE is not an integral multiple of
                 * sizeof (page_t) and therefore 'seg->pages'
                 * does not start on a page boundry.
                 *
                 * Since the other memseg's pages_t's still
                 * map valid pages, skip remap of this page.
                 * Advance 'pp' to the next page which should
                 * belong only to the outgoing memseg.
                 *
                 * If the last page_t in the current page
                 * crosses a page boundary, this should still
                 * work.  The first part of the page_t is
                 * valid since memseg_lock_delete_all() has
                 * been called.  The second part of the page_t
                 * will be remapped to the corresponding
                 * dummy page below.
                 */
                ASSERT(PAGESIZE % sizeof (page_t));
                pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
                metapgs--;
        }

        if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
            page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
                /*
                 * Another memseg has page_t's in the same
                 * page which 'epp' resides.  This would happen
                 * if PAGESIZE is not an integral multiple of
                 * sizeof (page_t) and therefore 'seg->epages'
                 * does not start on a page boundry.
                 *
                 * Since the other memseg's pages_t's still
                 * map valid pages, skip remap of this page.
                 */
                ASSERT(PAGESIZE % sizeof (page_t));
                metapgs--;
        }
#endif
        ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));

        remap_to_dummy((caddr_t)pp, metapgs);

        off = (u_offset_t)pp;

        MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
            (uint64_t)off, metapgs);
        /*
         * Free pages allocated during add.
         */
        for (i = 0; i < metapgs; i++) {
                pp = page_find(&mpvp, off);
                ASSERT(pp);
                ASSERT(pp->p_szc == 0);
                page_io_unlock(pp);
                page_destroy(pp, 0);
                off += PAGESIZE;
        }
}