root/drivers/s390/char/sclp_mem.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Memory hotplug support via sclp
 *
 * Copyright IBM Corp. 2025
 */

#define pr_fmt(fmt) "sclp_mem: " fmt

#include <linux/cpufeature.h>
#include <linux/container_of.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kobject.h>
#include <linux/kstrtox.h>
#include <linux/memory.h>
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/slab.h>
#include <asm/facility.h>
#include <asm/page.h>
#include <asm/page-states.h>
#include <asm/sclp.h>

#include "sclp.h"

#define SCLP_CMDW_ASSIGN_STORAGE                0x000d0001
#define SCLP_CMDW_UNASSIGN_STORAGE              0x000c0001

static LIST_HEAD(sclp_mem_list);
static u8 sclp_max_storage_id;
static DECLARE_BITMAP(sclp_storage_ids, 256);

struct memory_increment {
        struct list_head list;
        u16 rn;
        int standby;
};

struct sclp_mem {
        struct kobject kobj;
        unsigned int id;
        unsigned int memmap_on_memory;
        unsigned int config;
#ifdef CONFIG_KASAN
        unsigned int early_shadow_mapped;
#endif
};

struct sclp_mem_arg {
        struct sclp_mem *sclp_mems;
        struct kset *kset;
};

struct assign_storage_sccb {
        struct sccb_header header;
        u16 rn;
} __packed;

struct attach_storage_sccb {
        struct sccb_header header;
        u16 :16;
        u16 assigned;
        u32 :32;
        u32 entries[];
} __packed;

int arch_get_memory_phys_device(unsigned long start_pfn)
{
        if (!sclp.rzm)
                return 0;
        return PFN_PHYS(start_pfn) >> ilog2(sclp.rzm);
}

static unsigned long rn2addr(u16 rn)
{
        return (unsigned long)(rn - 1) * sclp.rzm;
}

static int do_assign_storage(sclp_cmdw_t cmd, u16 rn)
{
        struct assign_storage_sccb *sccb;
        int rc;

        sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
        if (!sccb)
                return -ENOMEM;
        sccb->header.length = PAGE_SIZE;
        sccb->rn = rn;
        rc = sclp_sync_request_timeout(cmd, sccb, SCLP_QUEUE_INTERVAL);
        if (rc)
                goto out;
        switch (sccb->header.response_code) {
        case 0x0020:
        case 0x0120:
                break;
        default:
                pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n",
                        cmd, sccb->header.response_code, rn);
                rc = -EIO;
                break;
        }
out:
        free_page((unsigned long)sccb);
        return rc;
}

static int sclp_assign_storage(u16 rn)
{
        unsigned long start;
        int rc;

        rc = do_assign_storage(SCLP_CMDW_ASSIGN_STORAGE, rn);
        if (rc)
                return rc;
        start = rn2addr(rn);
        storage_key_init_range(start, start + sclp.rzm);
        return 0;
}

static int sclp_unassign_storage(u16 rn)
{
        return do_assign_storage(SCLP_CMDW_UNASSIGN_STORAGE, rn);
}

static int sclp_attach_storage(u8 id)
{
        struct attach_storage_sccb *sccb;
        int rc, i;

        sccb = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
        if (!sccb)
                return -ENOMEM;
        sccb->header.length = PAGE_SIZE;
        sccb->header.function_code = 0x40;
        rc = sclp_sync_request_timeout(0x00080001 | id << 8, sccb,
                                       SCLP_QUEUE_INTERVAL);
        if (rc)
                goto out;
        switch (sccb->header.response_code) {
        case 0x0020:
                set_bit(id, sclp_storage_ids);
                for (i = 0; i < sccb->assigned; i++) {
                        if (sccb->entries[i])
                                sclp_unassign_storage(sccb->entries[i] >> 16);
                }
                break;
        default:
                rc = -EIO;
                break;
        }
out:
        free_page((unsigned long)sccb);
        return rc;
}

static int sclp_mem_change_state(unsigned long start, unsigned long size,
                                 int online)
{
        struct memory_increment *incr;
        unsigned long istart;
        int rc = 0;

        list_for_each_entry(incr, &sclp_mem_list, list) {
                istart = rn2addr(incr->rn);
                if (start + size - 1 < istart)
                        break;
                if (start > istart + sclp.rzm - 1)
                        continue;
                if (online)
                        rc |= sclp_assign_storage(incr->rn);
                else
                        sclp_unassign_storage(incr->rn);
                if (rc == 0)
                        incr->standby = online ? 0 : 1;
        }
        return rc ? -EIO : 0;
}

static ssize_t sclp_config_mem_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
        struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj);

        return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->config));
}

static ssize_t sclp_config_mem_store(struct kobject *kobj, struct kobj_attribute *attr,
                                     const char *buf, size_t count)
{
        unsigned long addr, block_size;
        struct sclp_mem *sclp_mem;
        struct memory_block *mem;
        unsigned char id;
        bool value;
        int rc;

        rc = kstrtobool(buf, &value);
        if (rc)
                return rc;
        sclp_mem = container_of(kobj, struct sclp_mem, kobj);
        block_size = memory_block_size_bytes();
        addr = sclp_mem->id * block_size;
        /*
         * Hold device_hotplug_lock when adding/removing memory blocks.
         * Additionally, also protect calls to find_memory_block() and
         * sclp_attach_storage().
         */
        rc = lock_device_hotplug_sysfs();
        if (rc)
                goto out;
        for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1)
                sclp_attach_storage(id);
        if (value) {
                if (sclp_mem->config)
                        goto out_unlock;
                rc = sclp_mem_change_state(addr, block_size, 1);
                if (rc)
                        goto out_unlock;
                /*
                 * Set entire memory block CMMA state to nodat. Later, when
                 * page tables pages are allocated via __add_memory(), those
                 * regions are marked __arch_set_page_dat().
                 */
                __arch_set_page_nodat((void *)__va(addr), block_size >> PAGE_SHIFT);
                rc = __add_memory(0, addr, block_size,
                                  sclp_mem->memmap_on_memory ?
                                  MHP_MEMMAP_ON_MEMORY : MHP_NONE);
                if (rc) {
                        sclp_mem_change_state(addr, block_size, 0);
                        goto out_unlock;
                }
                mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
                put_device(&mem->dev);
                WRITE_ONCE(sclp_mem->config, 1);
        } else {
                if (!sclp_mem->config)
                        goto out_unlock;
                mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(addr)));
                if (mem->state != MEM_OFFLINE) {
                        put_device(&mem->dev);
                        rc = -EBUSY;
                        goto out_unlock;
                }
                /* drop the ref just got via find_memory_block() */
                put_device(&mem->dev);
                sclp_mem_change_state(addr, block_size, 0);
                __remove_memory(addr, block_size);
#ifdef CONFIG_KASAN
                if (sclp_mem->early_shadow_mapped) {
                        unsigned long start, end;

                        start = (unsigned long)kasan_mem_to_shadow(__va(addr));
                        end = start + (block_size >> KASAN_SHADOW_SCALE_SHIFT);
                        vmemmap_free(start, end, NULL);
                        sclp_mem->early_shadow_mapped = 0;
                }
#endif
                WRITE_ONCE(sclp_mem->config, 0);
        }
out_unlock:
        unlock_device_hotplug();
out:
        return rc ? rc : count;
}

static struct kobj_attribute sclp_config_mem_attr =
        __ATTR(config, 0644, sclp_config_mem_show, sclp_config_mem_store);

static ssize_t sclp_memmap_on_memory_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
{
        struct sclp_mem *sclp_mem = container_of(kobj, struct sclp_mem, kobj);

        return sysfs_emit(buf, "%u\n", READ_ONCE(sclp_mem->memmap_on_memory));
}

static ssize_t sclp_memmap_on_memory_store(struct kobject *kobj, struct kobj_attribute *attr,
                                           const char *buf, size_t count)
{
        struct sclp_mem *sclp_mem;
        unsigned long block_size;
        struct memory_block *mem;
        bool value;
        int rc;

        rc = kstrtobool(buf, &value);
        if (rc)
                return rc;
        if (value && !mhp_supports_memmap_on_memory())
                return -EOPNOTSUPP;
        rc = lock_device_hotplug_sysfs();
        if (rc)
                return rc;
        block_size = memory_block_size_bytes();
        sclp_mem = container_of(kobj, struct sclp_mem, kobj);
        mem = find_memory_block(pfn_to_section_nr(PFN_DOWN(sclp_mem->id * block_size)));
        if (!mem) {
                WRITE_ONCE(sclp_mem->memmap_on_memory, value);
        } else {
                put_device(&mem->dev);
                rc = -EBUSY;
        }
        unlock_device_hotplug();
        return rc ? rc : count;
}

static const struct kobj_type ktype = {
        .sysfs_ops = &kobj_sysfs_ops,
};

static struct kobj_attribute sclp_memmap_attr =
        __ATTR(memmap_on_memory, 0644, sclp_memmap_on_memory_show, sclp_memmap_on_memory_store);

static struct attribute *sclp_mem_attrs[] = {
        &sclp_config_mem_attr.attr,
        &sclp_memmap_attr.attr,
        NULL,
};

static struct attribute_group sclp_mem_attr_group = {
        .attrs = sclp_mem_attrs,
};

static int sclp_create_mem(struct sclp_mem *sclp_mem, struct kset *kset,
                           unsigned int id, bool config, bool memmap_on_memory)
{
        int rc;

        sclp_mem->memmap_on_memory = memmap_on_memory;
        sclp_mem->config = config;
#ifdef CONFIG_KASAN
        sclp_mem->early_shadow_mapped = config;
#endif
        sclp_mem->id = id;
        kobject_init(&sclp_mem->kobj, &ktype);
        rc = kobject_add(&sclp_mem->kobj, &kset->kobj, "memory%d", id);
        if (rc)
                return rc;
        return sysfs_create_group(&sclp_mem->kobj, &sclp_mem_attr_group);
}

static int sclp_create_configured_mem(struct memory_block *mem, void *argument)
{
        struct sclp_mem *sclp_mems;
        struct sclp_mem_arg *arg;
        struct kset *kset;
        unsigned int id;

        id = mem->dev.id;
        arg = (struct sclp_mem_arg *)argument;
        sclp_mems = arg->sclp_mems;
        kset = arg->kset;
        return sclp_create_mem(&sclp_mems[id], kset, id, true, false);
}

static void __init align_to_block_size(unsigned long *start,
                                       unsigned long *size,
                                       unsigned long alignment)
{
        unsigned long start_align, size_align;

        start_align = roundup(*start, alignment);
        size_align = rounddown(*start + *size, alignment) - start_align;

        pr_info("Standby memory at 0x%lx (%luM of %luM usable)\n",
                *start, size_align >> 20, *size >> 20);
        *start = start_align;
        *size = size_align;
}

static int __init sclp_create_standby_mems_merged(struct sclp_mem *sclp_mems,
                                                  struct kset *kset, u16 rn)
{
        unsigned long start, size, addr, block_size;
        static u16 first_rn, num;
        unsigned int id;
        int rc = 0;

        if (rn && first_rn && (first_rn + num == rn)) {
                num++;
                return rc;
        }
        if (!first_rn)
                goto skip_add;
        start = rn2addr(first_rn);
        size = (unsigned long)num * sclp.rzm;
        if (start >= ident_map_size)
                goto skip_add;
        if (start + size > ident_map_size)
                size = ident_map_size - start;
        block_size = memory_block_size_bytes();
        align_to_block_size(&start, &size, block_size);
        if (!size)
                goto skip_add;
        for (addr = start; addr < start + size; addr += block_size) {
                id = addr / block_size;
                rc = sclp_create_mem(&sclp_mems[id], kset, id, false,
                                     mhp_supports_memmap_on_memory());
                if (rc)
                        break;
        }
skip_add:
        first_rn = rn;
        num = 1;
        return rc;
}

static int __init sclp_create_standby_mems(struct sclp_mem *sclp_mems, struct kset *kset)
{
        struct memory_increment *incr;
        int rc = 0;

        list_for_each_entry(incr, &sclp_mem_list, list) {
                if (incr->standby)
                        rc = sclp_create_standby_mems_merged(sclp_mems, kset, incr->rn);
                if (rc)
                        return rc;
        }
        return sclp_create_standby_mems_merged(sclp_mems, kset, 0);
}

static int __init sclp_init_mem(void)
{
        const unsigned long block_size = memory_block_size_bytes();
        unsigned int max_sclp_mems;
        struct sclp_mem *sclp_mems;
        struct sclp_mem_arg arg;
        struct kset *kset;
        int rc;

        max_sclp_mems = roundup(sclp.rnmax * sclp.rzm, block_size) / block_size;
        /* Allocate memory for all blocks ahead of time. */
        sclp_mems = kzalloc_objs(struct sclp_mem, max_sclp_mems);
        if (!sclp_mems)
                return -ENOMEM;
        kset = kset_create_and_add("memory", NULL, firmware_kobj);
        if (!kset)
                return -ENOMEM;
        /* Initial memory is in the "configured" state already. */
        arg.sclp_mems = sclp_mems;
        arg.kset = kset;
        rc = for_each_memory_block(&arg, sclp_create_configured_mem);
        if (rc)
                return rc;
        /* Standby memory is "deconfigured". */
        return sclp_create_standby_mems(sclp_mems, kset);
}

static void __init insert_increment(u16 rn, int standby, int assigned)
{
        struct memory_increment *incr, *new_incr;
        struct list_head *prev;
        u16 last_rn;

        new_incr = kzalloc_obj(*new_incr);
        if (!new_incr)
                return;
        new_incr->rn = rn;
        new_incr->standby = standby;
        last_rn = 0;
        prev = &sclp_mem_list;
        list_for_each_entry(incr, &sclp_mem_list, list) {
                if (assigned && incr->rn > rn)
                        break;
                if (!assigned && incr->rn - last_rn > 1)
                        break;
                last_rn = incr->rn;
                prev = &incr->list;
        }
        if (!assigned)
                new_incr->rn = last_rn + 1;
        if (new_incr->rn > sclp.rnmax) {
                kfree(new_incr);
                return;
        }
        list_add(&new_incr->list, prev);
}

static int __init sclp_setup_memory(void)
{
        struct read_storage_sccb *sccb;
        int i, id, assigned, rc;

        /* No standby memory in kdump mode */
        if (oldmem_data.start)
                return 0;
        if ((sclp.facilities & 0xe00000000000UL) != 0xe00000000000UL)
                return 0;
        rc = -ENOMEM;
        sccb = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
        if (!sccb)
                goto out;
        assigned = 0;
        for (id = 0; id <= sclp_max_storage_id; id++) {
                memset(sccb, 0, PAGE_SIZE);
                sccb->header.length = PAGE_SIZE;
                rc = sclp_sync_request(SCLP_CMDW_READ_STORAGE_INFO | id << 8, sccb);
                if (rc)
                        goto out;
                switch (sccb->header.response_code) {
                case 0x0010:
                        set_bit(id, sclp_storage_ids);
                        for (i = 0; i < sccb->assigned; i++) {
                                if (!sccb->entries[i])
                                        continue;
                                assigned++;
                                insert_increment(sccb->entries[i] >> 16, 0, 1);
                        }
                        break;
                case 0x0310:
                        break;
                case 0x0410:
                        for (i = 0; i < sccb->assigned; i++) {
                                if (!sccb->entries[i])
                                        continue;
                                assigned++;
                                insert_increment(sccb->entries[i] >> 16, 1, 1);
                        }
                        break;
                default:
                        rc = -EIO;
                        break;
                }
                if (!rc)
                        sclp_max_storage_id = sccb->max_id;
        }
        if (rc || list_empty(&sclp_mem_list))
                goto out;
        for (i = 1; i <= sclp.rnmax - assigned; i++)
                insert_increment(0, 1, 0);
        rc = sclp_init_mem();
out:
        free_page((unsigned long)sccb);
        return rc;
}
__initcall(sclp_setup_memory);