root/arch/parisc/kernel/pdt.c
// SPDX-License-Identifier: GPL-2.0
/*
 *    Page Deallocation Table (PDT) support
 *
 *    The Page Deallocation Table (PDT) is maintained by firmware and holds a
 *    list of memory addresses in which memory errors were detected.
 *    The list contains both single-bit (correctable) and double-bit
 *    (uncorrectable) errors.
 *
 *    Copyright 2017 by Helge Deller <deller@gmx.de>
 *
 *    possible future enhancements:
 *    - add userspace interface via procfs or sysfs to clear PDT
 */

#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <linux/kthread.h>
#include <linux/proc_fs.h>
#include <linux/initrd.h>
#include <linux/pgtable.h>
#include <linux/mm.h>

#include <asm/pdc.h>
#include <asm/pdcpat.h>
#include <asm/sections.h>
#include <asm/pgtable.h>

enum pdt_access_type {
        PDT_NONE,
        PDT_PDC,
        PDT_PAT_NEW,
        PDT_PAT_CELL
};

static enum pdt_access_type pdt_type;

/* PDT poll interval: 1 minute if errors, 5 minutes if everything OK. */
#define PDT_POLL_INTERVAL_DEFAULT       (5*60*HZ)
#define PDT_POLL_INTERVAL_SHORT         (1*60*HZ)
static unsigned long pdt_poll_interval = PDT_POLL_INTERVAL_DEFAULT;

/* global PDT status information */
static struct pdc_mem_retinfo pdt_status;

#define MAX_PDT_TABLE_SIZE      PAGE_SIZE
#define MAX_PDT_ENTRIES         (MAX_PDT_TABLE_SIZE / sizeof(unsigned long))
static unsigned long pdt_entry[MAX_PDT_ENTRIES] __page_aligned_bss;

/*
 * Constants for the pdt_entry format:
 * A pdt_entry holds the physical address in bits 0-57, bits 58-61 are
 * reserved, bit 62 is the perm bit and bit 63 is the error_type bit.
 * The perm bit indicates whether the error have been verified as a permanent
 * error (value of 1) or has not been verified, and may be transient (value
 * of 0). The error_type bit indicates whether the error is a single bit error
 * (value of 1) or a multiple bit error.
 * On non-PAT machines phys_addr is encoded in bits 0-59 and error_type in bit
 * 63. Those machines don't provide the perm bit.
 */

#define PDT_ADDR_PHYS_MASK      (pdt_type != PDT_PDC ? ~0x3f : ~0x0f)
#define PDT_ADDR_PERM_ERR       (pdt_type != PDT_PDC ? 2UL : 0UL)
#define PDT_ADDR_SINGLE_ERR     1UL

#ifdef CONFIG_PROC_FS
/* report PDT entries via /proc/meminfo */
void arch_report_meminfo(struct seq_file *m)
{
        if (pdt_type == PDT_NONE)
                return;

        seq_printf(m, "PDT_max_entries: %7lu\n",
                        pdt_status.pdt_size);
        seq_printf(m, "PDT_cur_entries: %7lu\n",
                        pdt_status.pdt_entries);
}
#endif

static int get_info_pat_new(void)
{
        struct pdc_pat_mem_retinfo pat_rinfo;
        int ret;

        /* newer PAT machines like C8000 report info for all cells */
        if (is_pdc_pat())
                ret = pdc_pat_mem_pdt_info(&pat_rinfo);
        else
                return PDC_BAD_PROC;

        pdt_status.pdt_size = pat_rinfo.max_pdt_entries;
        pdt_status.pdt_entries = pat_rinfo.current_pdt_entries;
        pdt_status.pdt_status = 0;
        pdt_status.first_dbe_loc = pat_rinfo.first_dbe_loc;
        pdt_status.good_mem = pat_rinfo.good_mem;

        return ret;
}

static int get_info_pat_cell(void)
{
        struct pdc_pat_mem_cell_pdt_retinfo cell_rinfo;
        int ret;

        /* older PAT machines like rp5470 report cell info only */
        if (is_pdc_pat())
                ret = pdc_pat_mem_pdt_cell_info(&cell_rinfo, parisc_cell_num);
        else
                return PDC_BAD_PROC;

        pdt_status.pdt_size = cell_rinfo.max_pdt_entries;
        pdt_status.pdt_entries = cell_rinfo.current_pdt_entries;
        pdt_status.pdt_status = 0;
        pdt_status.first_dbe_loc = cell_rinfo.first_dbe_loc;
        pdt_status.good_mem = cell_rinfo.good_mem;

        return ret;
}

static void report_mem_err(unsigned long pde)
{
        struct pdc_pat_mem_phys_mem_location loc;
        unsigned long addr;
        char dimm_txt[32];

        addr = pde & PDT_ADDR_PHYS_MASK;

        /* show DIMM slot description on PAT machines */
        if (is_pdc_pat()) {
                pdc_pat_mem_get_dimm_phys_location(&loc, addr);
                sprintf(dimm_txt, "DIMM slot %02x, ", loc.dimm_slot);
        } else
                dimm_txt[0] = 0;

        pr_warn("PDT: BAD MEMORY at 0x%08lx, %s%s%s-bit error.\n",
                addr, dimm_txt,
                pde & PDT_ADDR_PERM_ERR ? "permanent ":"",
                pde & PDT_ADDR_SINGLE_ERR ? "single":"multi");
}


/*
 * pdc_pdt_init()
 *
 * Initialize kernel PDT structures, read initial PDT table from firmware,
 * report all current PDT entries and mark bad memory with memblock_reserve()
 * to avoid that the kernel will use broken memory areas.
 *
 */
void __init pdc_pdt_init(void)
{
        int ret, i;
        unsigned long entries;
        struct pdc_mem_read_pdt pdt_read_ret;

        pdt_type = PDT_PAT_NEW;
        ret = get_info_pat_new();

        if (ret != PDC_OK) {
                pdt_type = PDT_PAT_CELL;
                ret = get_info_pat_cell();
        }

        if (ret != PDC_OK) {
                pdt_type = PDT_PDC;
                /* non-PAT machines provide the standard PDC call */
                ret = pdc_mem_pdt_info(&pdt_status);
        }

        if (ret != PDC_OK) {
                pdt_type = PDT_NONE;
                pr_info("PDT: Firmware does not provide any page deallocation"
                        " information.\n");
                return;
        }

        entries = pdt_status.pdt_entries;
        if (WARN_ON(entries > MAX_PDT_ENTRIES))
                entries = pdt_status.pdt_entries = MAX_PDT_ENTRIES;

        pr_info("PDT: type %s, size %lu, entries %lu, status %lu, dbe_loc 0x%lx,"
                " good_mem %lu MB\n",
                        pdt_type == PDT_PDC ? __stringify(PDT_PDC) :
                        pdt_type == PDT_PAT_CELL ? __stringify(PDT_PAT_CELL)
                                                 : __stringify(PDT_PAT_NEW),
                        pdt_status.pdt_size, pdt_status.pdt_entries,
                        pdt_status.pdt_status, pdt_status.first_dbe_loc,
                        pdt_status.good_mem / 1024 / 1024);

        if (entries == 0) {
                pr_info("PDT: Firmware reports all memory OK.\n");
                return;
        }

        if (pdt_status.first_dbe_loc &&
                pdt_status.first_dbe_loc <= __pa((unsigned long)&_end))
                pr_crit("CRITICAL: Bad memory inside kernel image memory area!\n");

        pr_warn("PDT: Firmware reports %lu entries of faulty memory:\n",
                entries);

        if (pdt_type == PDT_PDC)
                ret = pdc_mem_pdt_read_entries(&pdt_read_ret, pdt_entry);
        else {
#ifdef CONFIG_64BIT
                struct pdc_pat_mem_read_pd_retinfo pat_pret;

                if (pdt_type == PDT_PAT_CELL)
                        ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
                                MAX_PDT_ENTRIES);
                else
                        ret = pdc_pat_mem_read_pd_pdt(&pat_pret, pdt_entry,
                                MAX_PDT_TABLE_SIZE, 0);
#else
                ret = PDC_BAD_PROC;
#endif
        }

        if (ret != PDC_OK) {
                pdt_type = PDT_NONE;
                pr_warn("PDT: Get PDT entries failed with %d\n", ret);
                return;
        }

        for (i = 0; i < pdt_status.pdt_entries; i++) {
                unsigned long addr;

                report_mem_err(pdt_entry[i]);

                addr = pdt_entry[i] & PDT_ADDR_PHYS_MASK;
                if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) &&
                        addr >= initrd_start && addr < initrd_end)
                        pr_crit("CRITICAL: initrd possibly broken "
                                "due to bad memory!\n");

                /* mark memory page bad */
                memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
                num_poisoned_pages_inc(addr >> PAGE_SHIFT);
        }
}


/*
 * This is the PDT kernel thread main loop.
 */

static int pdt_mainloop(void *unused)
{
        struct pdc_mem_read_pdt pdt_read_ret;
        struct pdc_pat_mem_read_pd_retinfo pat_pret __maybe_unused;
        unsigned long old_num_entries;
        unsigned long *bad_mem_ptr;
        int num, ret;

        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);

                old_num_entries = pdt_status.pdt_entries;

                schedule_timeout(pdt_poll_interval);
                if (kthread_should_stop())
                        break;

                /* Do we have new PDT entries? */
                switch (pdt_type) {
                case PDT_PAT_NEW:
                        ret = get_info_pat_new();
                        break;
                case PDT_PAT_CELL:
                        ret = get_info_pat_cell();
                        break;
                default:
                        ret = pdc_mem_pdt_info(&pdt_status);
                        break;
                }

                if (ret != PDC_OK) {
                        pr_warn("PDT: unexpected failure %d\n", ret);
                        return -EINVAL;
                }

                /* if no new PDT entries, just wait again */
                num = pdt_status.pdt_entries - old_num_entries;
                if (num <= 0)
                        continue;

                /* decrease poll interval in case we found memory errors */
                if (pdt_status.pdt_entries &&
                        pdt_poll_interval == PDT_POLL_INTERVAL_DEFAULT)
                        pdt_poll_interval = PDT_POLL_INTERVAL_SHORT;

                /* limit entries to get */
                if (num > MAX_PDT_ENTRIES) {
                        num = MAX_PDT_ENTRIES;
                        pdt_status.pdt_entries = old_num_entries + num;
                }

                /* get new entries */
                switch (pdt_type) {
#ifdef CONFIG_64BIT
                case PDT_PAT_CELL:
                        if (pdt_status.pdt_entries > MAX_PDT_ENTRIES) {
                                pr_crit("PDT: too many entries.\n");
                                return -ENOMEM;
                        }
                        ret = pdc_pat_mem_read_cell_pdt(&pat_pret, pdt_entry,
                                MAX_PDT_ENTRIES);
                        bad_mem_ptr = &pdt_entry[old_num_entries];
                        break;
                case PDT_PAT_NEW:
                        ret = pdc_pat_mem_read_pd_pdt(&pat_pret,
                                pdt_entry,
                                num * sizeof(unsigned long),
                                old_num_entries * sizeof(unsigned long));
                        bad_mem_ptr = &pdt_entry[0];
                        break;
#endif
                default:
                        ret = pdc_mem_pdt_read_entries(&pdt_read_ret,
                                pdt_entry);
                        bad_mem_ptr = &pdt_entry[old_num_entries];
                        break;
                }

                /* report and mark memory broken */
                while (num--) {
                        unsigned long pde = *bad_mem_ptr++;

                        report_mem_err(pde);

#ifdef CONFIG_MEMORY_FAILURE
                        if ((pde & PDT_ADDR_PERM_ERR) ||
                            ((pde & PDT_ADDR_SINGLE_ERR) == 0))
                                memory_failure(pde >> PAGE_SHIFT, 0);
                        else
                                soft_offline_page(pde >> PAGE_SHIFT, 0);
#else
                        pr_crit("PDT: memory error at 0x%lx ignored.\n"
                                "Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
                                "for real handling.\n",
                                pde & PDT_ADDR_PHYS_MASK);
#endif

                }
        }

        return 0;
}


static int __init pdt_initcall(void)
{
        struct task_struct *kpdtd_task;

        if (pdt_type == PDT_NONE)
                return -ENODEV;

        kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");

        return PTR_ERR_OR_ZERO(kpdtd_task);
}

late_initcall(pdt_initcall);