#include <linux/array_size.h>
#include <linux/sort.h>
#include <linux/printk.h>
#include <linux/memblock.h>
#include <linux/numa.h>
#include <linux/numa_memblks.h>
#include <asm/numa.h>
int numa_distance_cnt;
static u8 *numa_distance;
nodemask_t numa_nodes_parsed __initdata;
static struct numa_meminfo numa_meminfo __initdata_or_meminfo;
static struct numa_meminfo numa_reserved_meminfo __initdata_or_meminfo;
static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask,
const struct numa_meminfo *mi)
{
int i;
for (i = 0; i < ARRAY_SIZE(mi->blk); i++)
if (mi->blk[i].start != mi->blk[i].end &&
mi->blk[i].nid != NUMA_NO_NODE)
node_set(mi->blk[i].nid, *nodemask);
}
void __init numa_reset_distance(void)
{
size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]);
if (numa_distance_cnt)
memblock_free(numa_distance, size);
numa_distance_cnt = 0;
numa_distance = NULL;
}
static int __init numa_alloc_distance(void)
{
nodemask_t nodes_parsed;
size_t size;
int i, j, cnt = 0;
nodes_parsed = numa_nodes_parsed;
numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo);
for_each_node_mask(i, nodes_parsed)
cnt = i;
cnt++;
size = cnt * cnt * sizeof(numa_distance[0]);
numa_distance = memblock_alloc(size, PAGE_SIZE);
if (!numa_distance) {
pr_warn("Warning: can't allocate distance table!\n");
numa_distance = (void *)1LU;
return -ENOMEM;
}
numa_distance_cnt = cnt;
for (i = 0; i < cnt; i++)
for (j = 0; j < cnt; j++)
numa_distance[i * cnt + j] = i == j ?
LOCAL_DISTANCE : REMOTE_DISTANCE;
pr_debug("NUMA: Initialized distance table, cnt=%d\n", cnt);
return 0;
}
void __init numa_set_distance(int from, int to, int distance)
{
if (!numa_distance && numa_alloc_distance() < 0)
return;
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
from < 0 || to < 0) {
pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
if ((u8)distance != distance ||
(from == to && distance != LOCAL_DISTANCE)) {
pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
from, to, distance);
return;
}
numa_distance[from * numa_distance_cnt + to] = distance;
}
int __node_distance(int from, int to)
{
if (from >= numa_distance_cnt || to >= numa_distance_cnt)
return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
return numa_distance[from * numa_distance_cnt + to];
}
EXPORT_SYMBOL(__node_distance);
static int __init numa_add_memblk_to(int nid, u64 start, u64 end,
struct numa_meminfo *mi)
{
if (start == end)
return 0;
if (start > end || nid < 0 || nid >= MAX_NUMNODES) {
pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1);
return 0;
}
if (mi->nr_blks >= NR_NODE_MEMBLKS) {
pr_err("too many memblk ranges\n");
return -EINVAL;
}
mi->blk[mi->nr_blks].start = start;
mi->blk[mi->nr_blks].end = end;
mi->blk[mi->nr_blks].nid = nid;
mi->nr_blks++;
return 0;
}
void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi)
{
mi->nr_blks--;
memmove(&mi->blk[idx], &mi->blk[idx + 1],
(mi->nr_blks - idx) * sizeof(mi->blk[0]));
}
static void __init numa_move_tail_memblk(struct numa_meminfo *dst, int idx,
struct numa_meminfo *src)
{
dst->blk[dst->nr_blks++] = src->blk[idx];
numa_remove_memblk_from(idx, src);
}
int __init numa_add_memblk(int nid, u64 start, u64 end)
{
return numa_add_memblk_to(nid, start, end, &numa_meminfo);
}
int __init numa_add_reserved_memblk(int nid, u64 start, u64 end)
{
return numa_add_memblk_to(nid, start, end, &numa_reserved_meminfo);
}
int __init numa_cleanup_meminfo(struct numa_meminfo *mi)
{
const u64 low = memblock_start_of_DRAM();
const u64 high = memblock_end_of_DRAM();
int i, j, k;
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
if (!memblock_overlaps_region(&memblock.memory,
bi->start, bi->end - bi->start)) {
numa_move_tail_memblk(&numa_reserved_meminfo, i--, mi);
continue;
}
bi->start = max(bi->start, low);
if (bi->end > high) {
numa_add_memblk_to(bi->nid, high, bi->end,
&numa_reserved_meminfo);
bi->end = high;
}
if (bi->start >= bi->end)
numa_remove_memblk_from(i--, mi);
}
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
for (j = i + 1; j < mi->nr_blks; j++) {
struct numa_memblk *bj = &mi->blk[j];
u64 start, end;
if (bi->end > bj->start && bi->start < bj->end) {
if (bi->nid != bj->nid) {
pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1,
bj->nid, bj->start, bj->end - 1);
return -EINVAL;
}
pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1,
bj->start, bj->end - 1);
}
if (bi->nid != bj->nid)
continue;
start = min(bi->start, bj->start);
end = max(bi->end, bj->end);
for (k = 0; k < mi->nr_blks; k++) {
struct numa_memblk *bk = &mi->blk[k];
if (bi->nid == bk->nid)
continue;
if (start < bk->end && end > bk->start)
break;
}
if (k < mi->nr_blks)
continue;
pr_info("NUMA: Node %d [mem %#010Lx-%#010Lx] + [mem %#010Lx-%#010Lx] -> [mem %#010Lx-%#010Lx]\n",
bi->nid, bi->start, bi->end - 1, bj->start,
bj->end - 1, start, end - 1);
bi->start = start;
bi->end = end;
numa_remove_memblk_from(j--, mi);
}
}
for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) {
mi->blk[i].start = mi->blk[i].end = 0;
mi->blk[i].nid = NUMA_NO_NODE;
}
return 0;
}
static void __init numa_clear_kernel_node_hotplug(void)
{
nodemask_t reserved_nodemask = NODE_MASK_NONE;
struct memblock_region *mb_region;
int i;
for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = numa_meminfo.blk + i;
int ret;
ret = memblock_set_node(mb->start, mb->end - mb->start,
&memblock.reserved, mb->nid);
WARN_ON_ONCE(ret);
}
for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
if (numa_valid_node(nid))
node_set(nid, reserved_nodemask);
}
for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = numa_meminfo.blk + i;
if (!node_isset(mb->nid, reserved_nodemask))
continue;
memblock_clear_hotplug(mb->start, mb->end - mb->start);
}
}
static int __init numa_register_meminfo(struct numa_meminfo *mi)
{
int i;
node_possible_map = numa_nodes_parsed;
numa_nodemask_from_meminfo(&node_possible_map, mi);
if (WARN_ON(nodes_empty(node_possible_map)))
return -EINVAL;
for (i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *mb = &mi->blk[i];
memblock_set_node(mb->start, mb->end - mb->start,
&memblock.memory, mb->nid);
}
numa_clear_kernel_node_hotplug();
if (IS_ENABLED(NODE_NOT_IN_PAGE_FLAGS)) {
unsigned long pfn_align = node_map_pfn_alignment();
if (pfn_align && pfn_align < PAGES_PER_SECTION) {
unsigned long node_align_mb = PFN_PHYS(pfn_align) / SZ_1M;
unsigned long sect_align_mb = PFN_PHYS(PAGES_PER_SECTION) / SZ_1M;
pr_warn("Node alignment %luMB < min %luMB, rejecting NUMA config\n",
node_align_mb, sect_align_mb);
return -EINVAL;
}
}
return 0;
}
int __init numa_memblks_init(int (*init_func)(void),
bool memblock_force_top_down)
{
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
int ret;
nodes_clear(numa_nodes_parsed);
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
WARN_ON(memblock_set_node(0, max_addr, &memblock.memory, NUMA_NO_NODE));
WARN_ON(memblock_set_node(0, max_addr, &memblock.reserved,
NUMA_NO_NODE));
WARN_ON(memblock_clear_hotplug(0, max_addr));
numa_reset_distance();
ret = init_func();
if (ret < 0)
return ret;
if (memblock_force_top_down)
memblock_set_bottom_up(false);
ret = numa_cleanup_meminfo(&numa_meminfo);
if (ret < 0)
return ret;
numa_emulation(&numa_meminfo, numa_distance_cnt);
return numa_register_meminfo(&numa_meminfo);
}
static int __init cmp_memblk(const void *a, const void *b)
{
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
int __init numa_fill_memblks(u64 start, u64 end)
{
struct numa_memblk **blk = &numa_memblk_list[0];
struct numa_meminfo *mi = &numa_meminfo;
int count = 0;
u64 prev_end;
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
if (memblock_addrs_overlap(start, end - start, bi->start,
bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
}
if (!count)
return NUMA_NO_MEMBLK;
sort(&blk[0], count, sizeof(blk[0]), cmp_memblk, NULL);
blk[0]->start = min(blk[0]->start, start);
blk[count - 1]->end = max(blk[count - 1]->end, end);
prev_end = blk[0]->end;
for (int i = 1; i < count; i++) {
struct numa_memblk *curr = blk[i];
if (prev_end >= curr->start) {
if (prev_end < curr->end)
prev_end = curr->end;
} else {
curr->start = prev_end;
prev_end = curr->end;
}
}
return 0;
}
#ifdef CONFIG_NUMA_KEEP_MEMINFO
static int meminfo_to_nid(struct numa_meminfo *mi, u64 start)
{
int i;
for (i = 0; i < mi->nr_blks; i++)
if (mi->blk[i].start <= start && mi->blk[i].end > start)
return mi->blk[i].nid;
return NUMA_NO_NODE;
}
int phys_to_target_node(u64 start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
int reserved_nid = meminfo_to_nid(&numa_reserved_meminfo, start);
if (nid != NUMA_NO_NODE && reserved_nid == NUMA_NO_NODE)
return nid;
return reserved_nid;
}
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{
int nid = meminfo_to_nid(&numa_meminfo, start);
if (nid == NUMA_NO_NODE)
nid = numa_meminfo.blk[0].nid;
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif