#include "vma_internal.h"
#include "vma.h"
struct mmap_state {
struct mm_struct *mm;
struct vma_iterator *vmi;
unsigned long addr;
unsigned long end;
pgoff_t pgoff;
unsigned long pglen;
union {
vm_flags_t vm_flags;
vma_flags_t vma_flags;
};
struct file *file;
pgprot_t page_prot;
const struct vm_operations_struct *vm_ops;
void *vm_private_data;
unsigned long charged;
struct vm_area_struct *prev;
struct vm_area_struct *next;
struct vma_munmap_struct vms;
struct ma_state mas_detach;
struct maple_tree mt_detach;
bool check_ksm_early :1;
bool hold_file_rmap_lock :1;
bool file_doesnt_need_get :1;
};
#define MMAP_STATE(name, mm_, vmi_, addr_, len_, pgoff_, vm_flags_, file_) \
struct mmap_state name = { \
.mm = mm_, \
.vmi = vmi_, \
.addr = addr_, \
.end = (addr_) + (len_), \
.pgoff = pgoff_, \
.pglen = PHYS_PFN(len_), \
.vm_flags = vm_flags_, \
.file = file_, \
.page_prot = vm_get_page_prot(vm_flags_), \
}
#define VMG_MMAP_STATE(name, map_, vma_) \
struct vma_merge_struct name = { \
.mm = (map_)->mm, \
.vmi = (map_)->vmi, \
.start = (map_)->addr, \
.end = (map_)->end, \
.vm_flags = (map_)->vm_flags, \
.pgoff = (map_)->pgoff, \
.file = (map_)->file, \
.prev = (map_)->prev, \
.middle = vma_, \
.next = (vma_) ? NULL : (map_)->next, \
.state = VMA_MERGE_START, \
}
static bool vma_is_fork_child(struct vm_area_struct *vma)
{
return vma && vma->anon_vma && !list_is_singular(&vma->anon_vma_chain);
}
static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next)
{
struct vm_area_struct *vma = merge_next ? vmg->next : vmg->prev;
if (!mpol_equal(vmg->policy, vma_policy(vma)))
return false;
if ((vma->vm_flags ^ vmg->vm_flags) & ~VM_IGNORE_MERGE)
return false;
if (vma->vm_file != vmg->file)
return false;
if (!is_mergeable_vm_userfaultfd_ctx(vma, vmg->uffd_ctx))
return false;
if (!anon_vma_name_eq(anon_vma_name(vma), vmg->anon_name))
return false;
return true;
}
static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next)
{
struct vm_area_struct *tgt = merge_next ? vmg->next : vmg->prev;
struct vm_area_struct *src = vmg->middle;
struct anon_vma *tgt_anon = tgt->anon_vma;
struct anon_vma *src_anon = vmg->anon_vma;
VM_WARN_ON(src && src_anon != src->anon_vma);
if (!tgt_anon && src_anon) {
struct vm_area_struct *copied_from = vmg->copied_from;
if (vma_is_fork_child(src))
return false;
if (vma_is_fork_child(copied_from))
return false;
return true;
}
if (tgt_anon && !src_anon)
return !vma_is_fork_child(tgt);
return src_anon == tgt_anon;
}
static void init_multi_vma_prep(struct vma_prepare *vp,
struct vm_area_struct *vma,
struct vma_merge_struct *vmg)
{
struct vm_area_struct *adjust;
struct vm_area_struct **remove = &vp->remove;
memset(vp, 0, sizeof(struct vma_prepare));
vp->vma = vma;
vp->anon_vma = vma->anon_vma;
if (vmg && vmg->__remove_middle) {
*remove = vmg->middle;
remove = &vp->remove2;
}
if (vmg && vmg->__remove_next)
*remove = vmg->next;
if (vmg && vmg->__adjust_middle_start)
adjust = vmg->middle;
else if (vmg && vmg->__adjust_next_start)
adjust = vmg->next;
else
adjust = NULL;
vp->adj_next = adjust;
if (!vp->anon_vma && adjust)
vp->anon_vma = adjust->anon_vma;
VM_WARN_ON(vp->anon_vma && adjust && adjust->anon_vma &&
vp->anon_vma != adjust->anon_vma);
vp->file = vma->vm_file;
if (vp->file)
vp->mapping = vma->vm_file->f_mapping;
if (vmg && vmg->skip_vma_uprobe)
vp->skip_vma_uprobe = true;
}
static bool can_vma_merge_before(struct vma_merge_struct *vmg)
{
pgoff_t pglen = PHYS_PFN(vmg->end - vmg->start);
if (is_mergeable_vma(vmg, true) &&
is_mergeable_anon_vma(vmg, true)) {
if (vmg->next->vm_pgoff == vmg->pgoff + pglen)
return true;
}
return false;
}
static bool can_vma_merge_after(struct vma_merge_struct *vmg)
{
if (is_mergeable_vma(vmg, false) &&
is_mergeable_anon_vma(vmg, false)) {
if (vmg->prev->vm_pgoff + vma_pages(vmg->prev) == vmg->pgoff)
return true;
}
return false;
}
static void __vma_link_file(struct vm_area_struct *vma,
struct address_space *mapping)
{
if (vma_is_shared_maywrite(vma))
mapping_allow_writable(mapping);
flush_dcache_mmap_lock(mapping);
vma_interval_tree_insert(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
}
static void __remove_shared_vm_struct(struct vm_area_struct *vma,
struct address_space *mapping)
{
if (vma_is_shared_maywrite(vma))
mapping_unmap_writable(mapping);
flush_dcache_mmap_lock(mapping);
vma_interval_tree_remove(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
}
static void
anon_vma_interval_tree_pre_update_vma(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc;
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_remove(avc, &avc->anon_vma->rb_root);
}
static void
anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
{
struct anon_vma_chain *avc;
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_insert(avc, &avc->anon_vma->rb_root);
}
static void vma_prepare(struct vma_prepare *vp)
{
if (vp->file) {
uprobe_munmap(vp->vma, vp->vma->vm_start, vp->vma->vm_end);
if (vp->adj_next)
uprobe_munmap(vp->adj_next, vp->adj_next->vm_start,
vp->adj_next->vm_end);
i_mmap_lock_write(vp->mapping);
if (vp->insert && vp->insert->vm_file) {
__vma_link_file(vp->insert,
vp->insert->vm_file->f_mapping);
}
}
if (vp->anon_vma) {
anon_vma_lock_write(vp->anon_vma);
anon_vma_interval_tree_pre_update_vma(vp->vma);
if (vp->adj_next)
anon_vma_interval_tree_pre_update_vma(vp->adj_next);
}
if (vp->file) {
flush_dcache_mmap_lock(vp->mapping);
vma_interval_tree_remove(vp->vma, &vp->mapping->i_mmap);
if (vp->adj_next)
vma_interval_tree_remove(vp->adj_next,
&vp->mapping->i_mmap);
}
}
static void vma_complete(struct vma_prepare *vp, struct vma_iterator *vmi,
struct mm_struct *mm)
{
if (vp->file) {
if (vp->adj_next)
vma_interval_tree_insert(vp->adj_next,
&vp->mapping->i_mmap);
vma_interval_tree_insert(vp->vma, &vp->mapping->i_mmap);
flush_dcache_mmap_unlock(vp->mapping);
}
if (vp->remove && vp->file) {
__remove_shared_vm_struct(vp->remove, vp->mapping);
if (vp->remove2)
__remove_shared_vm_struct(vp->remove2, vp->mapping);
} else if (vp->insert) {
vma_iter_store_new(vmi, vp->insert);
mm->map_count++;
}
if (vp->anon_vma) {
anon_vma_interval_tree_post_update_vma(vp->vma);
if (vp->adj_next)
anon_vma_interval_tree_post_update_vma(vp->adj_next);
anon_vma_unlock_write(vp->anon_vma);
}
if (vp->file) {
i_mmap_unlock_write(vp->mapping);
if (!vp->skip_vma_uprobe) {
uprobe_mmap(vp->vma);
if (vp->adj_next)
uprobe_mmap(vp->adj_next);
}
}
if (vp->remove) {
again:
vma_mark_detached(vp->remove);
if (vp->file) {
uprobe_munmap(vp->remove, vp->remove->vm_start,
vp->remove->vm_end);
fput(vp->file);
}
if (vp->remove->anon_vma)
unlink_anon_vmas(vp->remove);
mm->map_count--;
mpol_put(vma_policy(vp->remove));
if (!vp->remove2)
WARN_ON_ONCE(vp->vma->vm_end < vp->remove->vm_end);
vm_area_free(vp->remove);
if (vp->remove2) {
vp->remove = vp->remove2;
vp->remove2 = NULL;
goto again;
}
}
if (vp->insert && vp->file)
uprobe_mmap(vp->insert);
}
static void init_vma_prep(struct vma_prepare *vp, struct vm_area_struct *vma)
{
init_multi_vma_prep(vp, vma, NULL);
}
static bool can_vma_merge_left(struct vma_merge_struct *vmg)
{
return vmg->prev && vmg->prev->vm_end == vmg->start &&
can_vma_merge_after(vmg);
}
static bool can_vma_merge_right(struct vma_merge_struct *vmg,
bool can_merge_left)
{
struct vm_area_struct *next = vmg->next;
struct vm_area_struct *prev;
if (!next || vmg->end != next->vm_start || !can_vma_merge_before(vmg))
return false;
if (!can_merge_left)
return true;
prev = vmg->prev;
return !prev->anon_vma || !next->anon_vma ||
prev->anon_vma == next->anon_vma;
}
void remove_vma(struct vm_area_struct *vma)
{
might_sleep();
vma_close(vma);
if (vma->vm_file)
fput(vma->vm_file);
mpol_put(vma_policy(vma));
vm_area_free(vma);
}
void unmap_region(struct unmap_desc *unmap)
{
struct mm_struct *mm = unmap->first->vm_mm;
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm);
update_hiwater_rss(mm);
unmap_vmas(&tlb, unmap);
mas_set(unmap->mas, unmap->tree_reset);
free_pgtables(&tlb, unmap);
tlb_finish_mmu(&tlb);
}
static __must_check int
__split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, int new_below)
{
struct vma_prepare vp;
struct vm_area_struct *new;
int err;
WARN_ON(vma->vm_start >= addr);
WARN_ON(vma->vm_end <= addr);
if (vma->vm_ops && vma->vm_ops->may_split) {
err = vma->vm_ops->may_split(vma, addr);
if (err)
return err;
}
new = vm_area_dup(vma);
if (!new)
return -ENOMEM;
if (new_below) {
new->vm_end = addr;
} else {
new->vm_start = addr;
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
}
err = -ENOMEM;
vma_iter_config(vmi, new->vm_start, new->vm_end);
if (vma_iter_prealloc(vmi, new))
goto out_free_vma;
err = vma_dup_policy(vma, new);
if (err)
goto out_free_vmi;
err = anon_vma_clone(new, vma, VMA_OP_SPLIT);
if (err)
goto out_free_mpol;
if (new->vm_file)
get_file(new->vm_file);
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
vma_start_write(vma);
vma_start_write(new);
init_vma_prep(&vp, vma);
vp.insert = new;
vma_prepare(&vp);
vma_adjust_trans_huge(vma, vma->vm_start, addr, NULL);
if (is_vm_hugetlb_page(vma))
hugetlb_split(vma, addr);
if (new_below) {
vma->vm_start = addr;
vma->vm_pgoff += (addr - new->vm_start) >> PAGE_SHIFT;
} else {
vma->vm_end = addr;
}
vma_complete(&vp, vmi, vma->vm_mm);
validate_mm(vma->vm_mm);
if (new_below)
vma_next(vmi);
else
vma_prev(vmi);
return 0;
out_free_mpol:
mpol_put(vma_policy(new));
out_free_vmi:
vma_iter_free(vmi);
out_free_vma:
vm_area_free(new);
return err;
}
static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, int new_below)
{
if (vma->vm_mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
return __split_vma(vmi, vma, addr, new_below);
}
static int dup_anon_vma(struct vm_area_struct *dst,
struct vm_area_struct *src, struct vm_area_struct **dup)
{
if (src->anon_vma && !dst->anon_vma) {
int ret;
vma_assert_write_locked(dst);
dst->anon_vma = src->anon_vma;
ret = anon_vma_clone(dst, src, VMA_OP_MERGE_UNFAULTED);
if (ret)
return ret;
*dup = dst;
}
return 0;
}
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
void validate_mm(struct mm_struct *mm)
{
int bug = 0;
int i = 0;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
mt_validate(&mm->mm_mt);
for_each_vma(vmi, vma) {
#ifdef CONFIG_DEBUG_VM_RB
struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
#endif
unsigned long vmi_start, vmi_end;
bool warn = 0;
vmi_start = vma_iter_addr(&vmi);
vmi_end = vma_iter_end(&vmi);
if (VM_WARN_ON_ONCE_MM(vma->vm_end != vmi_end, mm))
warn = 1;
if (VM_WARN_ON_ONCE_MM(vma->vm_start != vmi_start, mm))
warn = 1;
if (warn) {
pr_emerg("issue in %s\n", current->comm);
dump_stack();
dump_vma(vma);
pr_emerg("tree range: %px start %lx end %lx\n", vma,
vmi_start, vmi_end - 1);
vma_iter_dump_tree(&vmi);
}
#ifdef CONFIG_DEBUG_VM_RB
if (anon_vma) {
anon_vma_lock_read(anon_vma);
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
anon_vma_interval_tree_verify(avc);
anon_vma_unlock_read(anon_vma);
}
#endif
if (++i > mm->map_count + 10) {
i = -1;
break;
}
}
if (i != mm->map_count) {
pr_emerg("map_count %d vma iterator %d\n", mm->map_count, i);
bug = 1;
}
VM_BUG_ON_MM(bug, mm);
}
#endif
static void vmg_adjust_set_range(struct vma_merge_struct *vmg)
{
struct vm_area_struct *adjust;
pgoff_t pgoff;
if (vmg->__adjust_middle_start) {
adjust = vmg->middle;
pgoff = adjust->vm_pgoff + PHYS_PFN(vmg->end - adjust->vm_start);
} else if (vmg->__adjust_next_start) {
adjust = vmg->next;
pgoff = adjust->vm_pgoff - PHYS_PFN(adjust->vm_start - vmg->end);
} else {
return;
}
vma_set_range(adjust, vmg->end, adjust->vm_end, pgoff);
}
static int commit_merge(struct vma_merge_struct *vmg)
{
struct vm_area_struct *vma;
struct vma_prepare vp;
if (vmg->__adjust_next_start) {
vma = vmg->middle;
vma_iter_config(vmg->vmi, vmg->end, vmg->next->vm_end);
} else {
vma = vmg->target;
vma_iter_config(vmg->vmi, vmg->start, vmg->end);
}
init_multi_vma_prep(&vp, vma, vmg);
if (vma_iter_prealloc(vmg->vmi, vma))
return -ENOMEM;
vma_prepare(&vp);
vma_adjust_trans_huge(vma, vmg->start, vmg->end,
vmg->__adjust_middle_start ? vmg->middle : NULL);
vma_set_range(vma, vmg->start, vmg->end, vmg->pgoff);
vmg_adjust_set_range(vmg);
vma_iter_store_overwrite(vmg->vmi, vmg->target);
vma_complete(&vp, vmg->vmi, vma->vm_mm);
return 0;
}
static bool can_merge_remove_vma(struct vm_area_struct *vma)
{
return !vma->vm_ops || !vma->vm_ops->close;
}
static __must_check struct vm_area_struct *vma_merge_existing_range(
struct vma_merge_struct *vmg)
{
vm_flags_t sticky_flags = vmg->vm_flags & VM_STICKY;
struct vm_area_struct *middle = vmg->middle;
struct vm_area_struct *prev = vmg->prev;
struct vm_area_struct *next;
struct vm_area_struct *anon_dup = NULL;
unsigned long start = vmg->start;
unsigned long end = vmg->end;
bool left_side = middle && start == middle->vm_start;
bool right_side = middle && end == middle->vm_end;
int err = 0;
bool merge_left, merge_right, merge_both;
mmap_assert_write_locked(vmg->mm);
VM_WARN_ON_VMG(!middle, vmg);
VM_WARN_ON_VMG(vmg->next, vmg);
VM_WARN_ON_VMG(prev && start <= prev->vm_start, vmg);
VM_WARN_ON_VMG(start >= end, vmg);
VM_WARN_ON_VMG(middle &&
((middle != prev && vmg->start != middle->vm_start) ||
vmg->end > middle->vm_end), vmg);
VM_WARN_ON_VMG(middle &&
!(vma_iter_addr(vmg->vmi) >= middle->vm_start &&
vma_iter_addr(vmg->vmi) < middle->vm_end), vmg);
VM_WARN_ON_VMG(vmg->copied_from, vmg);
vmg->state = VMA_MERGE_NOMERGE;
if (vmg->vm_flags & VM_SPECIAL || (!left_side && !right_side))
return NULL;
if (left_side)
merge_left = can_vma_merge_left(vmg);
else
merge_left = false;
if (right_side) {
next = vmg->next = vma_iter_next_range(vmg->vmi);
vma_iter_prev_range(vmg->vmi);
merge_right = can_vma_merge_right(vmg, merge_left);
} else {
merge_right = false;
next = NULL;
}
if (merge_left)
vma_prev(vmg->vmi);
else if (!merge_right)
return NULL;
merge_both = merge_left && merge_right;
vmg->__remove_middle = left_side && right_side;
if (vmg->__remove_middle && !can_merge_remove_vma(middle))
return NULL;
vmg->__remove_next = merge_both;
if (vmg->__remove_next && !can_merge_remove_vma(next)) {
vmg->__remove_next = false;
merge_right = false;
merge_both = false;
}
vma_start_write(middle);
if (merge_right) {
vma_start_write(next);
vmg->target = next;
sticky_flags |= (next->vm_flags & VM_STICKY);
}
if (merge_left) {
vma_start_write(prev);
vmg->target = prev;
sticky_flags |= (prev->vm_flags & VM_STICKY);
}
if (merge_both) {
vmg->start = prev->vm_start;
vmg->end = next->vm_end;
vmg->pgoff = prev->vm_pgoff;
err = dup_anon_vma(prev, next->anon_vma ? next : middle,
&anon_dup);
} else if (merge_left) {
vmg->start = prev->vm_start;
vmg->pgoff = prev->vm_pgoff;
if (!vmg->__remove_middle)
vmg->__adjust_middle_start = true;
err = dup_anon_vma(prev, middle, &anon_dup);
} else {
pgoff_t pglen = PHYS_PFN(vmg->end - vmg->start);
VM_WARN_ON_VMG(!merge_right, vmg);
VM_WARN_ON_VMG(vmg->start > middle->vm_start && prev && middle != prev, vmg);
if (vmg->__remove_middle) {
vmg->end = next->vm_end;
vmg->pgoff = next->vm_pgoff - pglen;
} else {
vmg->__adjust_next_start = true;
vmg->start = middle->vm_start;
vmg->end = start;
vmg->pgoff = middle->vm_pgoff;
}
err = dup_anon_vma(next, middle, &anon_dup);
}
if (err || commit_merge(vmg))
goto abort;
vm_flags_set(vmg->target, sticky_flags);
khugepaged_enter_vma(vmg->target, vmg->vm_flags);
vmg->state = VMA_MERGE_SUCCESS;
return vmg->target;
abort:
vma_iter_set(vmg->vmi, start);
vma_iter_load(vmg->vmi);
if (anon_dup)
unlink_anon_vmas(anon_dup);
if (!vmg->give_up_on_oom)
vmg->state = VMA_MERGE_ERROR_NOMEM;
return NULL;
}
struct vm_area_struct *vma_merge_new_range(struct vma_merge_struct *vmg)
{
struct vm_area_struct *prev = vmg->prev;
struct vm_area_struct *next = vmg->next;
unsigned long end = vmg->end;
bool can_merge_left, can_merge_right;
mmap_assert_write_locked(vmg->mm);
VM_WARN_ON_VMG(vmg->middle, vmg);
VM_WARN_ON_VMG(vmg->target, vmg);
VM_WARN_ON_VMG(vma_iter_addr(vmg->vmi) > end, vmg);
vmg->state = VMA_MERGE_NOMERGE;
if ((vmg->vm_flags & VM_SPECIAL) || (!prev && !next))
return NULL;
can_merge_left = can_vma_merge_left(vmg);
can_merge_right = !vmg->just_expand && can_vma_merge_right(vmg, can_merge_left);
if (can_merge_right) {
vmg->end = next->vm_end;
vmg->target = next;
}
if (can_merge_left) {
vmg->start = prev->vm_start;
vmg->target = prev;
vmg->pgoff = prev->vm_pgoff;
if (can_merge_right && !can_merge_remove_vma(next))
vmg->end = end;
if (!vmg->just_expand) {
vma_prev(vmg->vmi);
}
}
if (vmg->target && !vma_expand(vmg)) {
khugepaged_enter_vma(vmg->target, vmg->vm_flags);
vmg->state = VMA_MERGE_SUCCESS;
return vmg->target;
}
return NULL;
}
static struct vm_area_struct *vma_merge_copied_range(struct vma_merge_struct *vmg)
{
VM_WARN_ON_VMG(!vmg->middle, vmg);
vmg->copied_from = vmg->middle;
vmg->middle = NULL;
return vma_merge_new_range(vmg);
}
int vma_expand(struct vma_merge_struct *vmg)
{
struct vm_area_struct *anon_dup = NULL;
struct vm_area_struct *target = vmg->target;
struct vm_area_struct *next = vmg->next;
bool remove_next = false;
vm_flags_t sticky_flags;
int ret = 0;
mmap_assert_write_locked(vmg->mm);
vma_start_write(target);
if (next && target != next && vmg->end == next->vm_end)
remove_next = true;
VM_WARN_ON_VMG(!target, vmg);
VM_WARN_ON_VMG(remove_next && !can_merge_remove_vma(next), vmg);
VM_WARN_ON_VMG(next && !remove_next &&
next != target && vmg->end > next->vm_start, vmg);
VM_WARN_ON_VMG(target->vm_start < vmg->start ||
target->vm_end > vmg->end, vmg);
sticky_flags = vmg->vm_flags & VM_STICKY;
sticky_flags |= target->vm_flags & VM_STICKY;
if (remove_next)
sticky_flags |= next->vm_flags & VM_STICKY;
if (remove_next)
ret = dup_anon_vma(target, next, &anon_dup);
if (!ret && vmg->copied_from)
ret = dup_anon_vma(target, vmg->copied_from, &anon_dup);
if (ret)
return ret;
if (remove_next) {
vma_start_write(next);
vmg->__remove_next = true;
}
if (commit_merge(vmg))
goto nomem;
vm_flags_set(target, sticky_flags);
return 0;
nomem:
if (anon_dup)
unlink_anon_vmas(anon_dup);
if (!vmg->give_up_on_oom)
vmg->state = VMA_MERGE_ERROR_NOMEM;
return -ENOMEM;
}
int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, pgoff_t pgoff)
{
struct vma_prepare vp;
WARN_ON((vma->vm_start != start) && (vma->vm_end != end));
if (vma->vm_start < start)
vma_iter_config(vmi, vma->vm_start, start);
else
vma_iter_config(vmi, end, vma->vm_end);
if (vma_iter_prealloc(vmi, NULL))
return -ENOMEM;
vma_start_write(vma);
init_vma_prep(&vp, vma);
vma_prepare(&vp);
vma_adjust_trans_huge(vma, start, end, NULL);
vma_iter_clear(vmi);
vma_set_range(vma, start, end, pgoff);
vma_complete(&vp, vmi, vma->vm_mm);
validate_mm(vma->vm_mm);
return 0;
}
static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
struct ma_state *mas_detach, bool mm_wr_locked)
{
struct unmap_desc unmap = {
.mas = mas_detach,
.first = vms->vma,
.pg_start = vms->unmap_start,
.pg_end = vms->unmap_end,
.vma_start = vms->start,
.vma_end = vms->end,
.tree_reset = 1,
.tree_end = vms->vma_count,
.mm_wr_locked = mm_wr_locked,
};
if (!vms->clear_ptes)
return;
mas_set(mas_detach, 1);
unmap_region(&unmap);
vms->clear_ptes = false;
}
static void vms_clean_up_area(struct vma_munmap_struct *vms,
struct ma_state *mas_detach)
{
struct vm_area_struct *vma;
if (!vms->nr_pages)
return;
vms_clear_ptes(vms, mas_detach, true);
mas_set(mas_detach, 0);
mas_for_each(mas_detach, vma, ULONG_MAX)
vma_close(vma);
}
static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
struct ma_state *mas_detach)
{
struct vm_area_struct *vma;
struct mm_struct *mm;
mm = current->mm;
mm->map_count -= vms->vma_count;
mm->locked_vm -= vms->locked_vm;
if (vms->unlock)
mmap_write_downgrade(mm);
if (!vms->nr_pages)
return;
vms_clear_ptes(vms, mas_detach, !vms->unlock);
update_hiwater_vm(mm);
WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm) - vms->nr_pages);
VM_WARN_ON(vms->exec_vm > mm->exec_vm);
VM_WARN_ON(vms->stack_vm > mm->stack_vm);
VM_WARN_ON(vms->data_vm > mm->data_vm);
mm->exec_vm -= vms->exec_vm;
mm->stack_vm -= vms->stack_vm;
mm->data_vm -= vms->data_vm;
mas_set(mas_detach, 0);
mas_for_each(mas_detach, vma, ULONG_MAX)
remove_vma(vma);
vm_unacct_memory(vms->nr_accounted);
validate_mm(mm);
if (vms->unlock)
mmap_read_unlock(mm);
__mt_destroy(mas_detach->tree);
}
static void reattach_vmas(struct ma_state *mas_detach)
{
struct vm_area_struct *vma;
mas_set(mas_detach, 0);
mas_for_each(mas_detach, vma, ULONG_MAX)
vma_mark_attached(vma);
__mt_destroy(mas_detach->tree);
}
static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
struct ma_state *mas_detach)
{
struct vm_area_struct *next = NULL;
int error;
if (vms->start > vms->vma->vm_start) {
if (vms->end < vms->vma->vm_end &&
vms->vma->vm_mm->map_count >= sysctl_max_map_count) {
error = -ENOMEM;
goto map_count_exceeded;
}
if (vma_is_sealed(vms->vma)) {
error = -EPERM;
goto start_split_failed;
}
error = __split_vma(vms->vmi, vms->vma, vms->start, 1);
if (error)
goto start_split_failed;
}
vms->prev = vma_prev(vms->vmi);
if (vms->prev)
vms->unmap_start = vms->prev->vm_end;
for_each_vma_range(*(vms->vmi), next, vms->end) {
long nrpages;
if (vma_is_sealed(next)) {
error = -EPERM;
goto modify_vma_failed;
}
if (next->vm_end > vms->end) {
error = __split_vma(vms->vmi, next, vms->end, 0);
if (error)
goto end_split_failed;
}
vma_start_write(next);
mas_set(mas_detach, vms->vma_count++);
error = mas_store_gfp(mas_detach, next, GFP_KERNEL);
if (error)
goto munmap_gather_failed;
vma_mark_detached(next);
nrpages = vma_pages(next);
vms->nr_pages += nrpages;
if (next->vm_flags & VM_LOCKED)
vms->locked_vm += nrpages;
if (next->vm_flags & VM_ACCOUNT)
vms->nr_accounted += nrpages;
if (is_exec_mapping(next->vm_flags))
vms->exec_vm += nrpages;
else if (is_stack_mapping(next->vm_flags))
vms->stack_vm += nrpages;
else if (is_data_mapping(next->vm_flags))
vms->data_vm += nrpages;
if (vms->uf) {
error = userfaultfd_unmap_prep(next, vms->start,
vms->end, vms->uf);
if (error)
goto userfaultfd_error;
}
#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
BUG_ON(next->vm_start < vms->start);
BUG_ON(next->vm_start > vms->end);
#endif
}
vms->next = vma_next(vms->vmi);
if (vms->next)
vms->unmap_end = vms->next->vm_start;
#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
{
MA_STATE(test, mas_detach->tree, 0, 0);
struct vm_area_struct *vma_mas, *vma_test;
int test_count = 0;
vma_iter_set(vms->vmi, vms->start);
rcu_read_lock();
vma_test = mas_find(&test, vms->vma_count - 1);
for_each_vma_range(*(vms->vmi), vma_mas, vms->end) {
BUG_ON(vma_mas != vma_test);
test_count++;
vma_test = mas_next(&test, vms->vma_count - 1);
}
rcu_read_unlock();
BUG_ON(vms->vma_count != test_count);
}
#endif
while (vma_iter_addr(vms->vmi) > vms->start)
vma_iter_prev_range(vms->vmi);
vms->clear_ptes = true;
return 0;
userfaultfd_error:
munmap_gather_failed:
end_split_failed:
modify_vma_failed:
reattach_vmas(mas_detach);
start_split_failed:
map_count_exceeded:
return error;
}
static void init_vma_munmap(struct vma_munmap_struct *vms,
struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long start, unsigned long end, struct list_head *uf,
bool unlock)
{
vms->vmi = vmi;
vms->vma = vma;
if (vma) {
vms->start = start;
vms->end = end;
} else {
vms->start = vms->end = 0;
}
vms->unlock = unlock;
vms->uf = uf;
vms->vma_count = 0;
vms->nr_pages = vms->locked_vm = vms->nr_accounted = 0;
vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
vms->unmap_start = FIRST_USER_ADDRESS;
vms->unmap_end = USER_PGTABLES_CEILING;
vms->clear_ptes = false;
}
int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm, unsigned long start, unsigned long end,
struct list_head *uf, bool unlock)
{
struct maple_tree mt_detach;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_on_stack(mt_detach);
struct vma_munmap_struct vms;
int error;
init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock);
error = vms_gather_munmap_vmas(&vms, &mas_detach);
if (error)
goto gather_failed;
error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
if (error)
goto clear_tree_failed;
vms_complete_munmap_vmas(&vms, &mas_detach);
return 0;
clear_tree_failed:
reattach_vmas(&mas_detach);
gather_failed:
validate_mm(mm);
return error;
}
int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
unsigned long start, size_t len, struct list_head *uf,
bool unlock)
{
unsigned long end;
struct vm_area_struct *vma;
if ((offset_in_page(start)) || start > TASK_SIZE || len > TASK_SIZE-start)
return -EINVAL;
end = start + PAGE_ALIGN(len);
if (end == start)
return -EINVAL;
vma = vma_find(vmi, end);
if (!vma) {
if (unlock)
mmap_write_unlock(mm);
return 0;
}
return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}
static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg)
{
struct vm_area_struct *vma = vmg->middle;
unsigned long start = vmg->start;
unsigned long end = vmg->end;
struct vm_area_struct *merged;
merged = vma_merge_existing_range(vmg);
if (merged)
return merged;
if (vmg_nomem(vmg))
return ERR_PTR(-ENOMEM);
VM_WARN_ON(vmg->give_up_on_oom &&
(vma->vm_start != start || vma->vm_end != end));
if (vma->vm_start < start) {
int err = split_vma(vmg->vmi, vma, start, 1);
if (err)
return ERR_PTR(err);
}
if (vma->vm_end > end) {
int err = split_vma(vmg->vmi, vma, end, 0);
if (err)
return ERR_PTR(err);
}
return vma;
}
struct vm_area_struct *vma_modify_flags(struct vma_iterator *vmi,
struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
vm_flags_t *vm_flags_ptr)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
const vm_flags_t vm_flags = *vm_flags_ptr;
struct vm_area_struct *ret;
vmg.vm_flags = vm_flags;
ret = vma_modify(&vmg);
if (IS_ERR(ret))
return ret;
if (vmg.state == VMA_MERGE_SUCCESS)
*vm_flags_ptr = ret->vm_flags;
return ret;
}
struct vm_area_struct *vma_modify_name(struct vma_iterator *vmi,
struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct anon_vma_name *new_name)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
vmg.anon_name = new_name;
return vma_modify(&vmg);
}
struct vm_area_struct *vma_modify_policy(struct vma_iterator *vmi,
struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end,
struct mempolicy *new_pol)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
vmg.policy = new_pol;
return vma_modify(&vmg);
}
struct vm_area_struct *vma_modify_flags_uffd(struct vma_iterator *vmi,
struct vm_area_struct *prev, struct vm_area_struct *vma,
unsigned long start, unsigned long end, vm_flags_t vm_flags,
struct vm_userfaultfd_ctx new_ctx, bool give_up_on_oom)
{
VMG_VMA_STATE(vmg, vmi, prev, vma, start, end);
vmg.vm_flags = vm_flags;
vmg.uffd_ctx = new_ctx;
if (give_up_on_oom)
vmg.give_up_on_oom = true;
return vma_modify(&vmg);
}
struct vm_area_struct *vma_merge_extend(struct vma_iterator *vmi,
struct vm_area_struct *vma,
unsigned long delta)
{
VMG_VMA_STATE(vmg, vmi, vma, vma, vma->vm_end, vma->vm_end + delta);
vmg.next = vma_iter_next_rewind(vmi, NULL);
vmg.middle = NULL;
return vma_merge_new_range(&vmg);
}
void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
{
vb->count = 0;
}
static void unlink_file_vma_batch_process(struct unlink_vma_file_batch *vb)
{
struct address_space *mapping;
int i;
mapping = vb->vmas[0]->vm_file->f_mapping;
i_mmap_lock_write(mapping);
for (i = 0; i < vb->count; i++) {
VM_WARN_ON_ONCE(vb->vmas[i]->vm_file->f_mapping != mapping);
__remove_shared_vm_struct(vb->vmas[i], mapping);
}
i_mmap_unlock_write(mapping);
unlink_file_vma_batch_init(vb);
}
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
struct vm_area_struct *vma)
{
if (vma->vm_file == NULL)
return;
if ((vb->count > 0 && vb->vmas[0]->vm_file != vma->vm_file) ||
vb->count == ARRAY_SIZE(vb->vmas))
unlink_file_vma_batch_process(vb);
vb->vmas[vb->count] = vma;
vb->count++;
}
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb)
{
if (vb->count > 0)
unlink_file_vma_batch_process(vb);
}
static void vma_link_file(struct vm_area_struct *vma, bool hold_rmap_lock)
{
struct file *file = vma->vm_file;
struct address_space *mapping;
if (file) {
mapping = file->f_mapping;
i_mmap_lock_write(mapping);
__vma_link_file(vma, mapping);
if (!hold_rmap_lock)
i_mmap_unlock_write(mapping);
}
}
static int vma_link(struct mm_struct *mm, struct vm_area_struct *vma)
{
VMA_ITERATOR(vmi, mm, 0);
vma_iter_config(&vmi, vma->vm_start, vma->vm_end);
if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
vma_start_write(vma);
vma_iter_store_new(&vmi, vma);
vma_link_file(vma, false);
mm->map_count++;
validate_mm(mm);
return 0;
}
struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
unsigned long addr, unsigned long len, pgoff_t pgoff,
bool *need_rmap_locks)
{
struct vm_area_struct *vma = *vmap;
unsigned long vma_start = vma->vm_start;
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
bool faulted_in_anon_vma = true;
VMA_ITERATOR(vmi, mm, addr);
VMG_VMA_STATE(vmg, &vmi, NULL, vma, addr, addr + len);
if (unlikely(vma_is_anonymous(vma) && !vma->anon_vma)) {
pgoff = addr >> PAGE_SHIFT;
faulted_in_anon_vma = false;
}
if (vma->vm_file)
vmg.skip_vma_uprobe = true;
new_vma = find_vma_prev(mm, addr, &vmg.prev);
if (new_vma && new_vma->vm_start < addr + len)
return NULL;
vmg.pgoff = pgoff;
vmg.next = vma_iter_next_rewind(&vmi, NULL);
new_vma = vma_merge_copied_range(&vmg);
if (new_vma) {
if (unlikely(vma_start >= new_vma->vm_start &&
vma_start < new_vma->vm_end)) {
VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
*vmap = vma = new_vma;
}
*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
} else {
new_vma = vm_area_dup(vma);
if (!new_vma)
goto out;
vma_set_range(new_vma, addr, addr + len, pgoff);
if (vma_dup_policy(vma, new_vma))
goto out_free_vma;
if (anon_vma_clone(new_vma, vma, VMA_OP_REMAP))
goto out_free_mempol;
if (new_vma->vm_file)
get_file(new_vma->vm_file);
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
if (vma_link(mm, new_vma))
goto out_vma_link;
*need_rmap_locks = false;
}
return new_vma;
out_vma_link:
fixup_hugetlb_reservations(new_vma);
vma_close(new_vma);
if (new_vma->vm_file)
fput(new_vma->vm_file);
unlink_anon_vmas(new_vma);
out_free_mempol:
mpol_put(vma_policy(new_vma));
out_free_vma:
vm_area_free(new_vma);
out:
return NULL;
}
static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
{
return a->vm_end == b->vm_start &&
mpol_equal(vma_policy(a), vma_policy(b)) &&
a->vm_file == b->vm_file &&
!((a->vm_flags ^ b->vm_flags) & ~(VM_ACCESS_FLAGS | VM_IGNORE_MERGE)) &&
b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
}
static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old,
struct vm_area_struct *a,
struct vm_area_struct *b)
{
if (anon_vma_compatible(a, b)) {
struct anon_vma *anon_vma = READ_ONCE(old->anon_vma);
if (anon_vma && list_is_singular(&old->anon_vma_chain))
return anon_vma;
}
return NULL;
}
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = NULL;
struct vm_area_struct *prev, *next;
VMA_ITERATOR(vmi, vma->vm_mm, vma->vm_end);
next = vma_iter_load(&vmi);
if (next) {
anon_vma = reusable_anon_vma(next, vma, next);
if (anon_vma)
return anon_vma;
}
prev = vma_prev(&vmi);
VM_BUG_ON_VMA(prev != vma, vma);
prev = vma_prev(&vmi);
if (prev)
anon_vma = reusable_anon_vma(prev, prev, vma);
return anon_vma;
}
static bool vm_ops_needs_writenotify(const struct vm_operations_struct *vm_ops)
{
return vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite);
}
static bool vma_is_shared_writable(struct vm_area_struct *vma)
{
return (vma->vm_flags & (VM_WRITE | VM_SHARED)) ==
(VM_WRITE | VM_SHARED);
}
static bool vma_fs_can_writeback(struct vm_area_struct *vma)
{
if (vma->vm_flags & VM_PFNMAP)
return false;
return vma->vm_file && vma->vm_file->f_mapping &&
mapping_can_writeback(vma->vm_file->f_mapping);
}
bool vma_needs_dirty_tracking(struct vm_area_struct *vma)
{
if (!vma_is_shared_writable(vma))
return false;
if (vm_ops_needs_writenotify(vma->vm_ops))
return true;
return vma_fs_can_writeback(vma);
}
bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
{
if (!vma_is_shared_writable(vma))
return false;
if (vm_ops_needs_writenotify(vma->vm_ops))
return true;
if (pgprot_val(vm_page_prot) !=
pgprot_val(vm_pgprot_modify(vm_page_prot, vma->vm_flags)))
return false;
if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma))
return true;
if (userfaultfd_wp(vma))
return true;
return vma_fs_can_writeback(vma);
}
static DEFINE_MUTEX(mm_all_locks_mutex);
static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
{
if (!test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_lock);
if (__test_and_set_bit(0, (unsigned long *)
&anon_vma->root->rb_root.rb_root.rb_node))
BUG();
}
}
static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
{
if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
BUG();
down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_lock);
}
}
int mm_take_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
VMA_ITERATOR(vmi, mm, 0);
mmap_assert_write_locked(mm);
mutex_lock(&mm_all_locks_mutex);
for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
vma_start_write(vma);
}
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
is_vm_hugetlb_page(vma))
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->vm_file && vma->vm_file->f_mapping &&
!is_vm_hugetlb_page(vma))
vm_lock_mapping(mm, vma->vm_file->f_mapping);
}
vma_iter_init(&vmi, mm, 0);
for_each_vma(vmi, vma) {
if (signal_pending(current))
goto out_unlock;
if (vma->anon_vma)
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
vm_lock_anon_vma(mm, avc->anon_vma);
}
return 0;
out_unlock:
mm_drop_all_locks(mm);
return -EINTR;
}
static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
{
if (test_bit(0, (unsigned long *) &anon_vma->root->rb_root.rb_root.rb_node)) {
if (!__test_and_clear_bit(0, (unsigned long *)
&anon_vma->root->rb_root.rb_root.rb_node))
BUG();
anon_vma_unlock_write(anon_vma);
}
}
static void vm_unlock_mapping(struct address_space *mapping)
{
if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) {
i_mmap_unlock_write(mapping);
if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
&mapping->flags))
BUG();
}
}
void mm_drop_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
struct anon_vma_chain *avc;
VMA_ITERATOR(vmi, mm, 0);
mmap_assert_write_locked(mm);
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
for_each_vma(vmi, vma) {
if (vma->anon_vma)
list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
vm_unlock_anon_vma(avc->anon_vma);
if (vma->vm_file && vma->vm_file->f_mapping)
vm_unlock_mapping(vma->vm_file->f_mapping);
}
mutex_unlock(&mm_all_locks_mutex);
}
static bool accountable_mapping(struct file *file, vm_flags_t vm_flags)
{
if (file && is_file_hugepages(file))
return false;
return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
}
static void vms_abort_munmap_vmas(struct vma_munmap_struct *vms,
struct ma_state *mas_detach)
{
struct ma_state *mas = &vms->vmi->mas;
if (!vms->nr_pages)
return;
if (vms->clear_ptes)
return reattach_vmas(mas_detach);
mas_set_range(mas, vms->start, vms->end - 1);
mas_store_gfp(mas, NULL, GFP_KERNEL|__GFP_NOFAIL);
vms_complete_munmap_vmas(vms, mas_detach);
}
static void update_ksm_flags(struct mmap_state *map)
{
map->vm_flags = ksm_vma_flags(map->mm, map->file, map->vm_flags);
}
static void set_desc_from_map(struct vm_area_desc *desc,
const struct mmap_state *map)
{
desc->start = map->addr;
desc->end = map->end;
desc->pgoff = map->pgoff;
desc->vm_file = map->file;
desc->vma_flags = map->vma_flags;
desc->page_prot = map->page_prot;
}
static int __mmap_setup(struct mmap_state *map, struct vm_area_desc *desc,
struct list_head *uf)
{
int error;
struct vma_iterator *vmi = map->vmi;
struct vma_munmap_struct *vms = &map->vms;
vms->vma = vma_find(vmi, map->end);
init_vma_munmap(vms, vmi, vms->vma, map->addr, map->end, uf,
false);
if (vms->vma) {
mt_init_flags(&map->mt_detach,
vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_on_stack(map->mt_detach);
mas_init(&map->mas_detach, &map->mt_detach, 0);
error = vms_gather_munmap_vmas(vms, &map->mas_detach);
if (error) {
vms->nr_pages = 0;
return error;
}
map->next = vms->next;
map->prev = vms->prev;
} else {
map->next = vma_iter_next_rewind(vmi, &map->prev);
}
if (!may_expand_vm(map->mm, map->vm_flags, map->pglen - vms->nr_pages))
return -ENOMEM;
if (accountable_mapping(map->file, map->vm_flags)) {
map->charged = map->pglen;
map->charged -= vms->nr_accounted;
if (map->charged) {
error = security_vm_enough_memory_mm(map->mm, map->charged);
if (error)
return error;
}
vms->nr_accounted = 0;
map->vm_flags |= VM_ACCOUNT;
}
vms_clean_up_area(vms, &map->mas_detach);
set_desc_from_map(desc, map);
return 0;
}
static int __mmap_new_file_vma(struct mmap_state *map,
struct vm_area_struct *vma)
{
struct vma_iterator *vmi = map->vmi;
int error;
vma->vm_file = map->file;
if (!map->file_doesnt_need_get)
get_file(map->file);
if (!map->file->f_op->mmap)
return 0;
error = mmap_file(vma->vm_file, vma);
if (error) {
UNMAP_STATE(unmap, vmi, vma, vma->vm_start, vma->vm_end,
map->prev, map->next);
fput(vma->vm_file);
vma->vm_file = NULL;
vma_iter_set(vmi, vma->vm_end);
unmap_region(&unmap);
return error;
}
WARN_ON_ONCE(map->addr != vma->vm_start);
VM_WARN_ON_ONCE(map->vm_flags != vma->vm_flags &&
!(map->vm_flags & VM_MAYWRITE) &&
(vma->vm_flags & VM_MAYWRITE));
map->file = vma->vm_file;
map->vm_flags = vma->vm_flags;
return 0;
}
static int __mmap_new_vma(struct mmap_state *map, struct vm_area_struct **vmap)
{
struct vma_iterator *vmi = map->vmi;
int error = 0;
struct vm_area_struct *vma;
vma = vm_area_alloc(map->mm);
if (!vma)
return -ENOMEM;
vma_iter_config(vmi, map->addr, map->end);
vma_set_range(vma, map->addr, map->end, map->pgoff);
vm_flags_init(vma, map->vm_flags);
vma->vm_page_prot = map->page_prot;
if (vma_iter_prealloc(vmi, vma)) {
error = -ENOMEM;
goto free_vma;
}
if (map->file)
error = __mmap_new_file_vma(map, vma);
else if (map->vm_flags & VM_SHARED)
error = shmem_zero_setup(vma);
else
vma_set_anonymous(vma);
if (error)
goto free_iter_vma;
if (!map->check_ksm_early) {
update_ksm_flags(map);
vm_flags_init(vma, map->vm_flags);
}
#ifdef CONFIG_SPARC64
WARN_ON_ONCE(!arch_validate_flags(map->vm_flags));
#endif
vma_start_write(vma);
vma_iter_store_new(vmi, vma);
map->mm->map_count++;
vma_link_file(vma, map->hold_file_rmap_lock);
if (!vma_is_anonymous(vma))
khugepaged_enter_vma(vma, map->vm_flags);
*vmap = vma;
return 0;
free_iter_vma:
vma_iter_free(vmi);
free_vma:
vm_area_free(vma);
return error;
}
static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
{
struct mm_struct *mm = map->mm;
vm_flags_t vm_flags = vma->vm_flags;
perf_event_mmap(vma);
vms_complete_munmap_vmas(&map->vms, &map->mas_detach);
vm_stat_account(mm, vma->vm_flags, map->pglen);
if (vm_flags & VM_LOCKED) {
if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(mm))
vm_flags_clear(vma, VM_LOCKED_MASK);
else
mm->locked_vm += map->pglen;
}
if (vma->vm_file)
uprobe_mmap(vma);
if (pgtable_supports_soft_dirty())
vm_flags_set(vma, VM_SOFTDIRTY);
vma_set_page_prot(vma);
}
static void call_action_prepare(struct mmap_state *map,
struct vm_area_desc *desc)
{
struct mmap_action *action = &desc->action;
mmap_action_prepare(action, desc);
if (action->hide_from_rmap_until_complete)
map->hold_file_rmap_lock = true;
}
static int call_mmap_prepare(struct mmap_state *map,
struct vm_area_desc *desc)
{
int err;
err = vfs_mmap_prepare(map->file, desc);
if (err)
return err;
call_action_prepare(map, desc);
map->pgoff = desc->pgoff;
if (desc->vm_file != map->file) {
map->file_doesnt_need_get = true;
map->file = desc->vm_file;
}
map->vma_flags = desc->vma_flags;
map->page_prot = desc->page_prot;
map->vm_ops = desc->vm_ops;
map->vm_private_data = desc->private_data;
return 0;
}
static void set_vma_user_defined_fields(struct vm_area_struct *vma,
struct mmap_state *map)
{
if (map->vm_ops)
vma->vm_ops = map->vm_ops;
vma->vm_private_data = map->vm_private_data;
}
static bool can_set_ksm_flags_early(struct mmap_state *map)
{
struct file *file = map->file;
if (!file)
return true;
if (file->f_op->mmap_prepare)
return true;
if (shmem_file(file))
return true;
return false;
}
static int call_action_complete(struct mmap_state *map,
struct vm_area_desc *desc,
struct vm_area_struct *vma)
{
struct mmap_action *action = &desc->action;
int ret;
ret = mmap_action_complete(action, vma);
if (map->hold_file_rmap_lock) {
struct file *file = vma->vm_file;
i_mmap_unlock_write(file->f_mapping);
}
return ret;
}
static unsigned long __mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma = NULL;
bool have_mmap_prepare = file && file->f_op->mmap_prepare;
VMA_ITERATOR(vmi, mm, addr);
MMAP_STATE(map, mm, &vmi, addr, len, pgoff, vm_flags, file);
struct vm_area_desc desc = {
.mm = mm,
.file = file,
.action = {
.type = MMAP_NOTHING,
},
};
bool allocated_new = false;
int error;
map.check_ksm_early = can_set_ksm_flags_early(&map);
error = __mmap_setup(&map, &desc, uf);
if (!error && have_mmap_prepare)
error = call_mmap_prepare(&map, &desc);
if (error)
goto abort_munmap;
if (map.check_ksm_early)
update_ksm_flags(&map);
if (map.prev || map.next) {
VMG_MMAP_STATE(vmg, &map, NULL);
vma = vma_merge_new_range(&vmg);
}
if (!vma) {
error = __mmap_new_vma(&map, &vma);
if (error)
goto unacct_error;
allocated_new = true;
}
if (have_mmap_prepare)
set_vma_user_defined_fields(vma, &map);
__mmap_complete(&map, vma);
if (have_mmap_prepare && allocated_new) {
error = call_action_complete(&map, &desc, vma);
if (error)
return error;
}
return addr;
unacct_error:
if (map.charged)
vm_unacct_memory(map.charged);
abort_munmap:
if (map.file_doesnt_need_get)
fput(map.file);
vms_abort_munmap_vmas(&map.vms, &map.mas_detach);
return error;
}
unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
struct list_head *uf)
{
unsigned long ret;
bool writable_file_mapping = false;
mmap_assert_write_locked(current->mm);
if (map_deny_write_exec(vm_flags, vm_flags))
return -EACCES;
if (!arch_validate_flags(vm_flags))
return -EINVAL;
if (file && is_shared_maywrite_vm_flags(vm_flags)) {
int error = mapping_map_writable(file->f_mapping);
if (error)
return error;
writable_file_mapping = true;
}
ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
if (writable_file_mapping)
mapping_unmap_writable(file->f_mapping);
validate_mm(current->mm);
return ret;
}
int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long addr, unsigned long len, vm_flags_t vm_flags)
{
struct mm_struct *mm = current->mm;
vm_flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
vm_flags = ksm_vma_flags(mm, NULL, vm_flags);
if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
if (vma && vma->vm_end == addr) {
VMG_STATE(vmg, mm, vmi, addr, addr + len, vm_flags, PHYS_PFN(addr));
vmg.prev = vma;
vmg.just_expand = true;
if (vma_merge_new_range(&vmg))
goto out;
else if (vmg_nomem(&vmg))
goto unacct_fail;
}
if (vma)
vma_iter_next_range(vmi);
vma = vm_area_alloc(mm);
if (!vma)
goto unacct_fail;
vma_set_anonymous(vma);
vma_set_range(vma, addr, addr + len, addr >> PAGE_SHIFT);
vm_flags_init(vma, vm_flags);
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma_start_write(vma);
if (vma_iter_store_gfp(vmi, vma, GFP_KERNEL))
goto mas_store_fail;
mm->map_count++;
validate_mm(mm);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
mm->data_vm += len >> PAGE_SHIFT;
if (vm_flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
if (pgtable_supports_soft_dirty())
vm_flags_set(vma, VM_SOFTDIRTY);
return 0;
mas_store_fail:
vm_area_free(vma);
unacct_fail:
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
unsigned long length, gap;
unsigned long low_limit, high_limit;
struct vm_area_struct *tmp;
VMA_ITERATOR(vmi, current->mm, 0);
length = info->length + info->align_mask + info->start_gap;
if (length < info->length)
return -ENOMEM;
low_limit = info->low_limit;
if (low_limit < mmap_min_addr)
low_limit = mmap_min_addr;
high_limit = info->high_limit;
retry:
if (vma_iter_area_lowest(&vmi, low_limit, high_limit, length))
return -ENOMEM;
gap = vma_iter_addr(&vmi) + info->start_gap;
gap += (info->align_offset - gap) & info->align_mask;
tmp = vma_next(&vmi);
if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) {
if (vm_start_gap(tmp) < gap + length - 1) {
low_limit = tmp->vm_end;
vma_iter_reset(&vmi);
goto retry;
}
} else {
tmp = vma_prev(&vmi);
if (tmp && vm_end_gap(tmp) > gap) {
low_limit = vm_end_gap(tmp);
vma_iter_reset(&vmi);
goto retry;
}
}
return gap;
}
unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{
unsigned long length, gap, gap_end;
unsigned long low_limit, high_limit;
struct vm_area_struct *tmp;
VMA_ITERATOR(vmi, current->mm, 0);
length = info->length + info->align_mask + info->start_gap;
if (length < info->length)
return -ENOMEM;
low_limit = info->low_limit;
if (low_limit < mmap_min_addr)
low_limit = mmap_min_addr;
high_limit = info->high_limit;
retry:
if (vma_iter_area_highest(&vmi, low_limit, high_limit, length))
return -ENOMEM;
gap = vma_iter_end(&vmi) - info->length;
gap -= (gap - info->align_offset) & info->align_mask;
gap_end = vma_iter_end(&vmi);
tmp = vma_next(&vmi);
if (tmp && (tmp->vm_flags & VM_STARTGAP_FLAGS)) {
if (vm_start_gap(tmp) < gap_end) {
high_limit = vm_start_gap(tmp);
vma_iter_reset(&vmi);
goto retry;
}
} else {
tmp = vma_prev(&vmi);
if (tmp && vm_end_gap(tmp) > gap) {
high_limit = tmp->vm_start;
vma_iter_reset(&vmi);
goto retry;
}
}
return gap;
}
static int acct_stack_growth(struct vm_area_struct *vma,
unsigned long size, unsigned long grow)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long new_start;
if (!may_expand_vm(mm, vma->vm_flags, grow))
return -ENOMEM;
if (size > rlimit(RLIMIT_STACK))
return -ENOMEM;
if (!mlock_future_ok(mm, vma->vm_flags & VM_LOCKED, grow << PAGE_SHIFT))
return -ENOMEM;
new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start :
vma->vm_end - size;
if (is_hugepage_only_range(vma->vm_mm, new_start, size))
return -EFAULT;
if (security_vm_enough_memory_mm(mm, grow))
return -ENOMEM;
return 0;
}
#if defined(CONFIG_STACK_GROWSUP)
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *next;
unsigned long gap_addr;
int error = 0;
VMA_ITERATOR(vmi, mm, vma->vm_start);
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
mmap_assert_write_locked(mm);
address &= PAGE_MASK;
if (address >= (TASK_SIZE & PAGE_MASK))
return -ENOMEM;
address += PAGE_SIZE;
gap_addr = address + stack_guard_gap;
if (gap_addr < address || gap_addr > TASK_SIZE)
gap_addr = TASK_SIZE;
next = find_vma_intersection(mm, vma->vm_end, gap_addr);
if (next && vma_is_accessible(next)) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
}
if (next)
vma_iter_prev_range_limit(&vmi, address);
vma_iter_config(&vmi, vma->vm_start, address);
if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
if (unlikely(anon_vma_prepare(vma))) {
vma_iter_free(&vmi);
return -ENOMEM;
}
vma_start_write(vma);
anon_vma_lock_write(vma->anon_vma);
if (address > vma->vm_end) {
unsigned long size, grow;
size = address - vma->vm_start;
grow = (address - vma->vm_end) >> PAGE_SHIFT;
error = -ENOMEM;
if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
error = acct_stack_growth(vma, size, grow);
if (!error) {
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_end = address;
vma_iter_store_overwrite(&vmi, vma);
anon_vma_interval_tree_post_update_vma(vma);
perf_event_mmap(vma);
}
}
}
anon_vma_unlock_write(vma->anon_vma);
vma_iter_free(&vmi);
validate_mm(mm);
return error;
}
#endif
int expand_downwards(struct vm_area_struct *vma, unsigned long address)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *prev;
int error = 0;
VMA_ITERATOR(vmi, mm, vma->vm_start);
if (!(vma->vm_flags & VM_GROWSDOWN))
return -EFAULT;
mmap_assert_write_locked(mm);
address &= PAGE_MASK;
if (address < mmap_min_addr || address < FIRST_USER_ADDRESS)
return -EPERM;
prev = vma_prev(&vmi);
if (prev) {
if (!(prev->vm_flags & VM_GROWSDOWN) &&
vma_is_accessible(prev) &&
(address - prev->vm_end < stack_guard_gap))
return -ENOMEM;
}
if (prev)
vma_iter_next_range_limit(&vmi, vma->vm_start);
vma_iter_config(&vmi, address, vma->vm_end);
if (vma_iter_prealloc(&vmi, vma))
return -ENOMEM;
if (unlikely(anon_vma_prepare(vma))) {
vma_iter_free(&vmi);
return -ENOMEM;
}
vma_start_write(vma);
anon_vma_lock_write(vma->anon_vma);
if (address < vma->vm_start) {
unsigned long size, grow;
size = vma->vm_end - address;
grow = (vma->vm_start - address) >> PAGE_SHIFT;
error = -ENOMEM;
if (grow <= vma->vm_pgoff) {
error = acct_stack_growth(vma, size, grow);
if (!error) {
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
vm_stat_account(mm, vma->vm_flags, grow);
anon_vma_interval_tree_pre_update_vma(vma);
vma->vm_start = address;
vma->vm_pgoff -= grow;
vma_iter_store_overwrite(&vmi, vma);
anon_vma_interval_tree_post_update_vma(vma);
perf_event_mmap(vma);
}
}
}
anon_vma_unlock_write(vma->anon_vma);
vma_iter_free(&vmi);
validate_mm(mm);
return error;
}
int __vm_munmap(unsigned long start, size_t len, bool unlock)
{
int ret;
struct mm_struct *mm = current->mm;
LIST_HEAD(uf);
VMA_ITERATOR(vmi, mm, start);
if (mmap_write_lock_killable(mm))
return -EINTR;
ret = do_vmi_munmap(&vmi, mm, start, len, &uf, unlock);
if (ret || !unlock)
mmap_write_unlock(mm);
userfaultfd_unmap_complete(mm, &uf);
return ret;
}
int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
{
unsigned long charged = vma_pages(vma);
if (find_vma_intersection(mm, vma->vm_start, vma->vm_end))
return -ENOMEM;
if ((vma->vm_flags & VM_ACCOUNT) &&
security_vm_enough_memory_mm(mm, charged))
return -ENOMEM;
if (vma_is_anonymous(vma)) {
BUG_ON(vma->anon_vma);
vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
}
if (vma_link(mm, vma)) {
if (vma->vm_flags & VM_ACCOUNT)
vm_unacct_memory(charged);
return -ENOMEM;
}
return 0;
}