#include "mmu.h"
#include <string.h>
#include <OS.h>
#include <arch/cpu.h>
#include <arch/x86/descriptors.h>
#include <arch_kernel.h>
#include <boot/platform.h>
#include <boot/stdio.h>
#include <boot/kernel_args.h>
#include <boot/stage2.h>
#include <kernel.h>
#include "bios.h"
#include "interrupts.h"
#ifdef TRACE_MMU
# define TRACE(x...) dprintf(x)
#else
# define TRACE(x...) ;
#endif
struct extended_memory {
uint64 base_addr;
uint64 length;
uint32 type;
};
segment_descriptor gBootGDT[BOOT_GDT_SEGMENT_COUNT];
static const uint32 kDefaultPageTableFlags = 0x07;
static const size_t kMaxKernelSize = 0x1000000;
static const size_t kIdentityMapEnd = 0x0800000;
static uint32 *sPageDirectory = 0;
#ifdef _PXE_ENV
static addr_t sNextPhysicalAddress = 0x112000;
static addr_t sNextPhysicalKernelAddress = kIdentityMapEnd;
static addr_t sNextVirtualAddress = KERNEL_LOAD_BASE + kMaxKernelSize;
static addr_t sNextPageTableAddress = 0x7d000;
static const uint32 kPageTableRegionEnd = 0x8b000;
#else
static addr_t sNextPhysicalAddress = 0x100000;
static addr_t sNextPhysicalKernelAddress = kIdentityMapEnd;
static addr_t sNextVirtualAddress = KERNEL_LOAD_BASE + kMaxKernelSize;
static addr_t sNextPageTableAddress = 0x90000;
static const uint32 kPageTableRegionEnd = 0x9e000;
#endif
static addr_t
allocate_virtual(size_t size)
{
addr_t address = sNextVirtualAddress;
sNextVirtualAddress += size;
return address;
}
static addr_t
allocate_physical(size_t size, bool inIdentityMap = true)
{
if ((size % B_PAGE_SIZE) != 0)
panic("request for non-page-aligned physical memory!");
addr_t* nextAddress = &sNextPhysicalKernelAddress;
if (inIdentityMap) {
nextAddress = &sNextPhysicalAddress;
if ((*nextAddress + size) > kIdentityMapEnd) {
panic("request too large for identity-map physical memory!");
return 0;
}
}
uint64 base = *nextAddress;
if (!get_free_address_range(gKernelArgs.physical_allocated_range,
gKernelArgs.num_physical_allocated_ranges, base, size, &base)) {
panic("Out of physical memory!");
return 0;
}
insert_physical_allocated_range(base, size);
*nextAddress = base + size;
return base;
}
static addr_t
get_next_virtual_page()
{
return allocate_virtual(B_PAGE_SIZE);
}
static addr_t
get_next_physical_page()
{
return allocate_physical(B_PAGE_SIZE, false);
}
static uint32 *
get_next_page_table()
{
TRACE("get_next_page_table, sNextPageTableAddress %#" B_PRIxADDR
", kPageTableRegionEnd %#" B_PRIxADDR "\n", sNextPageTableAddress,
kPageTableRegionEnd);
addr_t address = sNextPageTableAddress;
if (address >= kPageTableRegionEnd)
return (uint32 *)allocate_physical(B_PAGE_SIZE);
sNextPageTableAddress += B_PAGE_SIZE;
return (uint32 *)address;
}
static uint32*
add_page_table(addr_t base)
{
if (gKernelArgs.arch_args.num_pgtables == MAX_BOOT_PTABLES) {
panic("gKernelArgs.arch_args.pgtables overflow");
return NULL;
}
base = ROUNDDOWN(base, B_PAGE_SIZE * 1024);
uint32 *pageTable = get_next_page_table();
if (pageTable > (uint32 *)kIdentityMapEnd) {
panic("tried to add page table beyond the identity mapped 8 MB "
"region\n");
return NULL;
}
TRACE("add_page_table(base = %p), got page: %p\n", (void*)base, pageTable);
gKernelArgs.arch_args.pgtables[gKernelArgs.arch_args.num_pgtables++]
= (uint32)pageTable;
for (int32 i = 0; i < 1024; i++)
pageTable[i] = 0;
sPageDirectory[base / (4 * 1024 * 1024)]
= (uint32)pageTable | kDefaultPageTableFlags;
base += B_PAGE_SIZE * 1024;
if (base > gKernelArgs.arch_args.virtual_end)
gKernelArgs.arch_args.virtual_end = base;
return pageTable;
}
static void
unmap_page(addr_t virtualAddress)
{
TRACE("unmap_page(virtualAddress = %p)\n", (void *)virtualAddress);
if (virtualAddress < KERNEL_LOAD_BASE) {
panic("unmap_page: asked to unmap invalid page %p!\n",
(void *)virtualAddress);
}
uint32 *pageTable = (uint32 *)(sPageDirectory[virtualAddress
/ (B_PAGE_SIZE * 1024)] & 0xfffff000);
pageTable[(virtualAddress % (B_PAGE_SIZE * 1024)) / B_PAGE_SIZE] = 0;
asm volatile("invlpg (%0)" : : "r" (virtualAddress));
}
static void
map_page(addr_t virtualAddress, addr_t physicalAddress, uint32 flags)
{
TRACE("map_page: vaddr 0x%lx, paddr 0x%lx\n", virtualAddress,
physicalAddress);
if (virtualAddress < KERNEL_LOAD_BASE) {
panic("map_page: asked to map invalid page %p!\n",
(void *)virtualAddress);
}
uint32 *pageTable = (uint32 *)(sPageDirectory[virtualAddress
/ (B_PAGE_SIZE * 1024)] & 0xfffff000);
if (pageTable == NULL) {
pageTable = add_page_table(virtualAddress);
if (pageTable == NULL) {
panic("map_page: failed to allocate a page table for virtual "
"address %p\n", (void*)virtualAddress);
return;
}
}
physicalAddress &= ~(B_PAGE_SIZE - 1);
uint32 tableEntry = (virtualAddress % (B_PAGE_SIZE * 1024)) / B_PAGE_SIZE;
TRACE("map_page: inserting pageTable %p, tableEntry %" B_PRIu32
", physicalAddress %#" B_PRIxADDR "\n", pageTable, tableEntry,
physicalAddress);
pageTable[tableEntry] = physicalAddress | flags;
asm volatile("invlpg (%0)" : : "r" (virtualAddress));
TRACE("map_page: done\n");
}
#ifdef TRACE_MEMORY_MAP
static const char *
e820_memory_type(uint32 type)
{
switch (type) {
case 1: return "memory";
case 2: return "reserved";
case 3: return "ACPI reclaim";
case 4: return "ACPI NVS";
default: return "unknown/reserved";
}
}
#endif
static uint32
get_memory_map(extended_memory **_extendedMemory)
{
extended_memory *block = (extended_memory *)kExtraSegmentScratch;
bios_regs regs;
uint32 count = 0;
TRACE("get_memory_map()\n");
regs.ecx = sizeof(extended_memory);
regs.edi = (uint32)block;
do {
regs.eax = 0xe820;
regs.edx = 'SMAP';
call_bios(0x15, ®s);
if ((regs.flags & CARRY_FLAG) != 0)
return 0;
regs.edi += sizeof(extended_memory);
count++;
} while (regs.ebx != 0);
*_extendedMemory = block;
#ifdef TRACE_MEMORY_MAP
dprintf("extended memory info (from 0xe820):\n");
for (uint32 i = 0; i < count; i++) {
dprintf(" base 0x%08Lx, len 0x%08Lx, type %lu (%s)\n",
block[i].base_addr, block[i].length,
block[i].type, e820_memory_type(block[i].type));
}
#endif
return count;
}
static void
init_page_directory(void)
{
TRACE("init_page_directory\n");
sPageDirectory = (uint32 *)allocate_physical(B_PAGE_SIZE);
gKernelArgs.arch_args.phys_pgdir = (uint32)sPageDirectory;
for (int32 i = 0; i < 1024; i++) {
sPageDirectory[i] = 0;
}
uint32 *pageTable = get_next_page_table();
for (int32 i = 0; i < 1024; i++) {
pageTable[i] = (i * 0x1000) | kDefaultPageFlags;
}
sPageDirectory[0] = (uint32)pageTable | kDefaultPageFlags;
pageTable = get_next_page_table();
for (int32 i = 0; i < 1024; i++) {
pageTable[i] = (i * 0x1000 + 0x400000) | kDefaultPageFlags;
}
sPageDirectory[1] = (uint32)pageTable | kDefaultPageFlags;
gKernelArgs.arch_args.num_pgtables = 0;
asm("movl %0, %%eax;"
"movl %%eax, %%cr3;" : : "m" (sPageDirectory) : "eax");
asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
}
extern "C" addr_t
mmu_map_physical_memory(addr_t physicalAddress, size_t size, uint32 flags)
{
addr_t address = sNextVirtualAddress;
addr_t pageOffset = physicalAddress & (B_PAGE_SIZE - 1);
physicalAddress -= pageOffset;
size += pageOffset;
for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
map_page(get_next_virtual_page(), physicalAddress + offset, flags);
}
return address + pageOffset;
}
extern "C" void *
mmu_allocate(void *virtualAddress, size_t size)
{
TRACE("mmu_allocate: requested vaddr: %p, next free vaddr: 0x%lx, size: "
"%ld\n", virtualAddress, sNextVirtualAddress, size);
size = HOWMANY(size, B_PAGE_SIZE);
if (virtualAddress != NULL) {
addr_t address = (addr_t)virtualAddress;
if (address < KERNEL_LOAD_BASE || address + size * B_PAGE_SIZE
>= KERNEL_LOAD_BASE + kMaxKernelSize)
return NULL;
for (uint32 i = 0; i < size; i++) {
map_page(address, get_next_physical_page(), kDefaultPageFlags);
address += B_PAGE_SIZE;
}
return virtualAddress;
}
void *address = (void *)sNextVirtualAddress;
for (uint32 i = 0; i < size; i++) {
map_page(get_next_virtual_page(), get_next_physical_page(),
kDefaultPageFlags);
}
return address;
}
void *
mmu_allocate_page(addr_t *_physicalAddress)
{
addr_t virt = get_next_virtual_page();
addr_t phys = get_next_physical_page();
map_page(virt, phys, kDefaultPageFlags);
if (_physicalAddress)
*_physicalAddress = phys;
return (void *)virt;
}
bool
mmu_allocate_physical(addr_t base, size_t size)
{
if (!is_address_range_covered(gKernelArgs.physical_memory_range,
gKernelArgs.num_physical_memory_ranges, base, size)) {
return false;
}
uint64 foundBase;
if (!get_free_address_range(gKernelArgs.physical_allocated_range,
gKernelArgs.num_physical_allocated_ranges, base, size, &foundBase)
|| foundBase != base) {
return false;
}
return insert_physical_allocated_range(base, size) == B_OK;
}
extern "C" void
mmu_free(void *virtualAddress, size_t size)
{
TRACE("mmu_free(virtualAddress = %p, size: %ld)\n", virtualAddress, size);
addr_t address = (addr_t)virtualAddress;
addr_t pageOffset = address % B_PAGE_SIZE;
address -= pageOffset;
size += pageOffset;
size = ROUNDUP(size, B_PAGE_SIZE);
if (address < KERNEL_LOAD_BASE || address + size > sNextVirtualAddress) {
panic("mmu_free: asked to unmap out of range region (%p, size %lx)\n",
(void *)address, size);
}
for (size_t i = 0; i < size; i += B_PAGE_SIZE) {
unmap_page(address);
address += B_PAGE_SIZE;
}
if (address == sNextVirtualAddress) {
sNextVirtualAddress -= size;
}
}
size_t
mmu_get_virtual_usage()
{
return sNextVirtualAddress - KERNEL_LOAD_BASE;
}
bool
mmu_get_virtual_mapping(addr_t virtualAddress, addr_t *_physicalAddress)
{
if (virtualAddress < KERNEL_LOAD_BASE) {
panic("mmu_get_virtual_mapping: asked to lookup invalid page %p!\n",
(void *)virtualAddress);
}
uint32 dirEntry = sPageDirectory[virtualAddress / (B_PAGE_SIZE * 1024)];
if ((dirEntry & (1 << 0)) == 0)
return false;
uint32 *pageTable = (uint32 *)(dirEntry & 0xfffff000);
uint32 tableEntry = pageTable[(virtualAddress % (B_PAGE_SIZE * 1024))
/ B_PAGE_SIZE];
if ((tableEntry & (1 << 0)) == 0)
return false;
*_physicalAddress = tableEntry & 0xfffff000;
return true;
}
extern "C" void
mmu_init_for_kernel(void)
{
TRACE("mmu_init_for_kernel\n");
STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
clear_segment_descriptor(&gBootGDT[0]);
set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], 0, 0xffffffff,
DT_CODE_READABLE, DPL_KERNEL);
set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], 0, 0xffffffff,
DT_DATA_WRITEABLE, DPL_KERNEL);
set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], 0, 0xffffffff,
DT_CODE_READABLE, DPL_USER);
set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], 0, 0xffffffff,
DT_DATA_WRITEABLE, DPL_USER);
struct gdt_idt_descr gdtDescriptor;
gdtDescriptor.limit = sizeof(gBootGDT);
gdtDescriptor.base = gBootGDT;
asm("lgdt %0" : : "m" (gdtDescriptor));
TRACE("gdt at virtual address %p\n", gBootGDT);
gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE;
gKernelArgs.virtual_allocated_range[0].size
= sNextVirtualAddress - KERNEL_LOAD_BASE;
gKernelArgs.num_virtual_allocated_ranges = 1;
sort_address_ranges(gKernelArgs.physical_memory_range,
gKernelArgs.num_physical_memory_ranges);
sort_address_ranges(gKernelArgs.physical_allocated_range,
gKernelArgs.num_physical_allocated_ranges);
sort_address_ranges(gKernelArgs.virtual_allocated_range,
gKernelArgs.num_virtual_allocated_ranges);
#ifdef TRACE_MEMORY_MAP
{
uint32 i;
dprintf("phys memory ranges:\n");
for (i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
gKernelArgs.physical_memory_range[i].start,
gKernelArgs.physical_memory_range[i].size);
}
dprintf("allocated phys memory ranges:\n");
for (i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
gKernelArgs.physical_allocated_range[i].start,
gKernelArgs.physical_allocated_range[i].size);
}
dprintf("allocated virt memory ranges:\n");
for (i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
gKernelArgs.virtual_allocated_range[i].start,
gKernelArgs.virtual_allocated_range[i].size);
}
}
#endif
}
extern "C" void
mmu_init(void)
{
TRACE("mmu_init\n");
gKernelArgs.arch_args.virtual_end = KERNEL_LOAD_BASE;
gKernelArgs.physical_allocated_range[0].start = sNextPhysicalAddress;
gKernelArgs.physical_allocated_range[0].size = 0;
gKernelArgs.physical_allocated_range[1].start = sNextPhysicalKernelAddress;
gKernelArgs.physical_allocated_range[1].size = 0;
gKernelArgs.num_physical_allocated_ranges = 2;
init_page_directory();
sPageDirectory[1023] = (uint32)sPageDirectory | kDefaultPageFlags;
gKernelArgs.arch_args.vir_pgdir = get_next_virtual_page();
map_page(gKernelArgs.arch_args.vir_pgdir, (uint32)sPageDirectory,
kDefaultPageFlags);
gKernelArgs.cpu_kstack[0].start = (addr_t)mmu_allocate(NULL,
KERNEL_STACK_SIZE + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE);
gKernelArgs.cpu_kstack[0].size = KERNEL_STACK_SIZE
+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE;
TRACE("kernel stack at 0x%" B_PRIx64 " to 0x%" B_PRIx64 "\n",
gKernelArgs.cpu_kstack[0].start, gKernelArgs.cpu_kstack[0].start
+ gKernelArgs.cpu_kstack[0].size);
extended_memory *extMemoryBlock;
uint32 extMemoryCount = get_memory_map(&extMemoryBlock);
if (extMemoryCount > 0) {
gKernelArgs.num_physical_memory_ranges = 0;
for (uint32 i = 0; i < extMemoryCount; i++) {
if (extMemoryBlock[i].type != 1)
continue;
uint64 base = extMemoryBlock[i].base_addr;
uint64 length = extMemoryBlock[i].length;
uint64 end = base + length;
base = ROUNDUP(base, B_PAGE_SIZE);
end = ROUNDDOWN(end, B_PAGE_SIZE);
#if B_HAIKU_PHYSICAL_BITS == 32
if (end > 0x100000000ULL)
end = 0x100000000ULL;
#endif
if (base < 0x100000)
base = 0x100000;
gKernelArgs.ignored_physical_memory
+= length - (max_c(end, base) - base);
if (end <= base)
continue;
status_t status = insert_physical_memory_range(base, end - base);
if (status == B_ENTRY_NOT_FOUND) {
panic("mmu_init(): Failed to add physical memory range "
"%#" B_PRIx64 " - %#" B_PRIx64 " : all %d entries are "
"used already!\n", base, end, MAX_PHYSICAL_MEMORY_RANGE);
} else if (status != B_OK) {
panic("mmu_init(): Failed to add physical memory range "
"%#" B_PRIx64 " - %#" B_PRIx64 "\n", base, end);
}
}
uint64 initialPhysicalMemory = total_physical_memory();
for (uint32 i = 0; i < extMemoryCount; i++) {
if (extMemoryBlock[i].type == 1)
continue;
uint64 base = extMemoryBlock[i].base_addr;
uint64 end = ROUNDUP(base + extMemoryBlock[i].length, B_PAGE_SIZE);
base = ROUNDDOWN(base, B_PAGE_SIZE);
status_t status = remove_physical_memory_range(base, end - base);
if (status != B_OK) {
panic("mmu_init(): Failed to remove physical memory range "
"%#" B_PRIx64 " - %#" B_PRIx64 "\n", base, end);
}
}
sort_address_ranges(gKernelArgs.physical_memory_range,
gKernelArgs.num_physical_memory_ranges);
for (int32 i = gKernelArgs.num_physical_memory_ranges - 1; i >= 0;
i--) {
uint64 size = gKernelArgs.physical_memory_range[i].size;
if (size < 64 * 1024) {
uint64 start = gKernelArgs.physical_memory_range[i].start;
remove_physical_memory_range(start, size);
}
}
gKernelArgs.ignored_physical_memory
+= initialPhysicalMemory - total_physical_memory();
} else {
bios_regs regs;
gKernelArgs.physical_memory_range[0].start = 0;
gKernelArgs.physical_memory_range[0].size = 0x9f000;
gKernelArgs.physical_memory_range[1].start = 0x100000;
regs.eax = 0xe801;
call_bios(0x15, ®s);
if ((regs.flags & CARRY_FLAG) != 0) {
regs.eax = 0x8800;
call_bios(0x15, ®s);
if ((regs.flags & CARRY_FLAG) != 0) {
dprintf("No memory size - using 64 MB (fix me!)\n");
uint32 memSize = 64 * 1024 * 1024;
gKernelArgs.physical_memory_range[1].size = memSize - 0x100000;
} else {
dprintf("Get Extended Memory Size succeeded.\n");
gKernelArgs.physical_memory_range[1].size = regs.eax * 1024;
}
gKernelArgs.num_physical_memory_ranges = 2;
} else {
dprintf("Get Memory Size for Large Configurations succeeded.\n");
gKernelArgs.physical_memory_range[1].size = regs.ecx * 1024;
gKernelArgs.physical_memory_range[2].start = 0x1000000;
gKernelArgs.physical_memory_range[2].size = regs.edx * 64 * 1024;
gKernelArgs.num_physical_memory_ranges = 3;
}
}
gKernelArgs.arch_args.page_hole = 0xffc00000;
}
extern "C" status_t
platform_allocate_region(void **_address, size_t size, uint8 protection)
{
void *address = mmu_allocate(*_address, size);
if (address == NULL)
return B_NO_MEMORY;
*_address = address;
return B_OK;
}
extern "C" status_t
platform_free_region(void *address, size_t size)
{
mmu_free(address, size);
return B_OK;
}
ssize_t
platform_allocate_heap_region(size_t size, void **_base)
{
size = ROUNDUP(size, B_PAGE_SIZE);
addr_t base = allocate_physical(size);
if (base == 0)
return B_NO_MEMORY;
*_base = (void*)base;
return size;
}
void
platform_free_heap_region(void *_base, size_t size)
{
addr_t base = (addr_t)_base;
remove_physical_allocated_range(base, size);
if (sNextPhysicalAddress == (base + size))
sNextPhysicalAddress -= size;
}
extern "C" status_t
platform_bootloader_address_to_kernel_address(void *address, addr_t *_result)
{
TRACE("%s: called\n", __func__);
*_result = (addr_t)address;
return B_OK;
}
extern "C" status_t
platform_kernel_address_to_bootloader_address(addr_t address, void **_result)
{
TRACE("%s: called\n", __func__);
*_result = (void*)address;
return B_OK;
}