From d9af235c635a189fbb1418ba47e77eeb6e6cdfc5 Mon Sep 17 00:00:00 2001 From: Zhang Xiao Date: Thu, 22 Jun 2017 17:54:59 +0800 Subject: [PATCH] WR6 linux-windriver: preliminary fix to CVE-2017-100364 CVE-2017-100365 CVE: CVE-2017-100364 CVE-2017-100365 See included patch for more details on the changes. Signed-off-by: Zhang Xiao --- ...vma_lock_anon_vma-with-anon_vma_lock_read.patch | 195 +++++ ...02-mm-larger-stack-guard-gap-between-vmas.patch | 812 +++++++++++++++++++++ ...mm-fix-new-crash-in-unmapped_area_topdown.patch | 53 ++ recipes-kernel/linux/linux-windriver_3.10.bb | 4 + 4 files changed, 1064 insertions(+) create mode 100644 recipes-kernel/linux/linux-windriver/0001-mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read.patch create mode 100644 recipes-kernel/linux/linux-windriver/0002-mm-larger-stack-guard-gap-between-vmas.patch create mode 100644 recipes-kernel/linux/linux-windriver/0003-mm-fix-new-crash-in-unmapped_area_topdown.patch diff --git a/recipes-kernel/linux/linux-windriver/0001-mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read.patch b/recipes-kernel/linux/linux-windriver/0001-mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read.patch new file mode 100644 index 0000000..0b675d3 --- /dev/null +++ b/recipes-kernel/linux/linux-windriver/0001-mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read.patch @@ -0,0 +1,195 @@ +From 002cd8e756aebf08e772aba23de607c158502f17 Mon Sep 17 00:00:00 2001 +From: Konstantin Khlebnikov +Date: Fri, 5 Feb 2016 23:36:50 +0000 +Subject: [PATCH 1/3] mm: replace vma_lock_anon_vma with + anon_vma_lock_read/write + +[ Upstream commit 12352d3cae2cebe18805a91fab34b534d7444231 ] + +Sequence vma_lock_anon_vma() - vma_unlock_anon_vma() isn't safe if +anon_vma appeared between lock and unlock. We have to check anon_vma +first or call anon_vma_prepare() to be sure that it's here. There are +only few users of these legacy helpers. Let's get rid of them. + +This patch fixes anon_vma lock imbalance in validate_mm(). Write lock +isn't required here, read lock is enough. + +And reorders expand_downwards/expand_upwards: security_mmap_addr() and +wrapping-around check don't have to be under anon vma lock. + +Link: https://lkml.kernel.org/r/CACT4Y+Y908EjM2z=706dv4rV6dWtxTLK9nFg9_7DhRMLppBo2g@mail.gmail.com +Signed-off-by: Konstantin Khlebnikov +Reported-by: Dmitry Vyukov +Acked-by: Kirill A. Shutemov +Cc: Andrea Arcangeli +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +Signed-off-by: Paul Gortmaker +Signed-off-by: Zhang Xiao +--- + include/linux/rmap.h | 14 ------------- + mm/mmap.c | 55 ++++++++++++++++++++++++---------------------------- + 2 files changed, 25 insertions(+), 44 deletions(-) + +diff --git a/include/linux/rmap.h b/include/linux/rmap.h +index fc7c6cb..8a3ded5 100644 +--- a/include/linux/rmap.h ++++ b/include/linux/rmap.h +@@ -114,20 +114,6 @@ static inline struct anon_vma *page_anon_vma(struct page *page) + return page_rmapping(page); + } + +-static inline void vma_lock_anon_vma(struct vm_area_struct *vma) +-{ +- struct anon_vma *anon_vma = vma->anon_vma; +- if (anon_vma) +- down_write(&anon_vma->root->rwsem); +-} +- +-static inline void vma_unlock_anon_vma(struct vm_area_struct *vma) +-{ +- struct anon_vma *anon_vma = vma->anon_vma; +- if (anon_vma) +- up_write(&anon_vma->root->rwsem); +-} +- + static inline void anon_vma_lock_write(struct anon_vma *anon_vma) + { + down_write(&anon_vma->root->rwsem); +diff --git a/mm/mmap.c b/mm/mmap.c +index 385bf55..670654d 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -415,11 +415,15 @@ void validate_mm(struct mm_struct *mm) + unsigned long highest_address = 0; + struct vm_area_struct *vma = mm->mmap; + while (vma) { ++ struct anon_vma *anon_vma = vma->anon_vma; + struct anon_vma_chain *avc; +- vma_lock_anon_vma(vma); +- list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) +- anon_vma_interval_tree_verify(avc); +- vma_unlock_anon_vma(vma); ++ if (anon_vma) { ++ anon_vma_lock_read(anon_vma); ++ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) ++ anon_vma_interval_tree_verify(avc); ++ anon_vma_unlock_read(anon_vma); ++ } ++ + highest_address = vma->vm_end; + vma = vma->vm_next; + i++; +@@ -2111,32 +2115,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns + */ + int expand_upwards(struct vm_area_struct *vma, unsigned long address) + { +- int error; ++ int error = 0; + + if (!(vma->vm_flags & VM_GROWSUP)) + return -EFAULT; + +- /* +- * We must make sure the anon_vma is allocated +- * so that the anon_vma locking is not a noop. +- */ ++ /* Guard against wrapping around to address 0. */ ++ if (address < PAGE_ALIGN(address+4)) ++ address = PAGE_ALIGN(address+4); ++ else ++ return -ENOMEM; ++ ++ /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; +- vma_lock_anon_vma(vma); + + /* + * vma->vm_start/vm_end cannot change under us because the caller + * is required to hold the mmap_sem in read mode. We need the + * anon_vma lock to serialize against concurrent expand_stacks. +- * Also guard against wrapping around to address 0. + */ +- if (address < PAGE_ALIGN(address+4)) +- address = PAGE_ALIGN(address+4); +- else { +- vma_unlock_anon_vma(vma); +- return -ENOMEM; +- } +- error = 0; ++ anon_vma_lock_write(vma->anon_vma); + + /* Somebody else might have raced and expanded it already */ + if (address > vma->vm_end) { +@@ -2154,7 +2153,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) + * updates, but we only hold a shared mmap_sem + * lock here, so we need to protect against + * concurrent vma expansions. +- * vma_lock_anon_vma() doesn't help here, as ++ * anon_vma_lock_write() doesn't help here, as + * we don't guarantee that all growable vmas + * in a mm share the same root anon vma. + * So, we reuse mm->page_table_lock to guard +@@ -2174,7 +2173,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) + } + } + } +- vma_unlock_anon_vma(vma); ++ anon_vma_unlock_write(vma->anon_vma); + khugepaged_enter_vma_merge(vma); + validate_mm(vma->vm_mm); + return error; +@@ -2189,25 +2188,21 @@ int expand_downwards(struct vm_area_struct *vma, + { + int error; + +- /* +- * We must make sure the anon_vma is allocated +- * so that the anon_vma locking is not a noop. +- */ +- if (unlikely(anon_vma_prepare(vma))) +- return -ENOMEM; +- + address &= PAGE_MASK; + error = security_mmap_addr(address); + if (error) + return error; + +- vma_lock_anon_vma(vma); ++ /* We must make sure the anon_vma is allocated. */ ++ if (unlikely(anon_vma_prepare(vma))) ++ return -ENOMEM; + + /* + * vma->vm_start/vm_end cannot change under us because the caller + * is required to hold the mmap_sem in read mode. We need the + * anon_vma lock to serialize against concurrent expand_stacks. + */ ++ anon_vma_lock_write(vma->anon_vma); + + /* Somebody else might have raced and expanded it already */ + if (address < vma->vm_start) { +@@ -2225,7 +2220,7 @@ int expand_downwards(struct vm_area_struct *vma, + * updates, but we only hold a shared mmap_sem + * lock here, so we need to protect against + * concurrent vma expansions. +- * vma_lock_anon_vma() doesn't help here, as ++ * anon_vma_lock_write() doesn't help here, as + * we don't guarantee that all growable vmas + * in a mm share the same root anon vma. + * So, we reuse mm->page_table_lock to guard +@@ -2243,7 +2238,7 @@ int expand_downwards(struct vm_area_struct *vma, + } + } + } +- vma_unlock_anon_vma(vma); ++ anon_vma_unlock_write(vma->anon_vma); + khugepaged_enter_vma_merge(vma); + validate_mm(vma->vm_mm); + return error; +-- +1.9.1 + diff --git a/recipes-kernel/linux/linux-windriver/0002-mm-larger-stack-guard-gap-between-vmas.patch b/recipes-kernel/linux/linux-windriver/0002-mm-larger-stack-guard-gap-between-vmas.patch new file mode 100644 index 0000000..9576f0f --- /dev/null +++ b/recipes-kernel/linux/linux-windriver/0002-mm-larger-stack-guard-gap-between-vmas.patch @@ -0,0 +1,812 @@ +From 89a0cd565b4c8e383beca8901cdc228bb2d24450 Mon Sep 17 00:00:00 2001 +From: Hugh Dickins +Date: Mon, 19 Jun 2017 11:03:24 +0000 +Subject: [PATCH 2/3] mm: larger stack guard gap, between vmas + +commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream. + +Stack guard page is a useful feature to reduce a risk of stack smashing +into a different mapping. We have been using a single page gap which +is sufficient to prevent having stack adjacent to a different mapping. +But this seems to be insufficient in the light of the stack usage in +userspace. E.g. glibc uses as large as 64kB alloca() in many commonly +used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX] +which is 256kB or stack strings with MAX_ARG_STRLEN. + +This will become especially dangerous for suid binaries and the default +no limit for the stack size limit because those applications can be +tricked to consume a large portion of the stack and a single glibc call +could jump over the guard page. These attacks are not theoretical, +unfortunatelly. + +Make those attacks less probable by increasing the stack guard gap +to 1MB (on systems with 4k pages; but make it depend on the page size +because systems with larger base pages might cap stack allocations in +the PAGE_SIZE units) which should cover larger alloca() and VLA stack +allocations. It is obviously not a full fix because the problem is +somehow inherent, but it should reduce attack space a lot. + +One could argue that the gap size should be configurable from userspace, +but that can be done later when somebody finds that the new 1MB is wrong +for some special case applications. For now, add a kernel command line +option (stack_guard_gap) to specify the stack gap size (in page units). + +Implementation wise, first delete all the old code for stack guard page: +because although we could get away with accounting one extra page in a +stack vma, accounting a larger gap can break userspace - case in point, +a program run with "ulimit -S -v 20000" failed when the 1MB gap was +counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK +and strict non-overcommit mode. + +Instead of keeping gap inside the stack vma, maintain the stack guard +gap as a gap between vmas: using vm_start_gap() in place of vm_start +(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few +places which need to respect the gap - mainly arch_get_unmapped_area(), +and and the vma tree's subtree_gap support for that. + +Original-patch-by: Oleg Nesterov +Original-patch-by: Michal Hocko +Signed-off-by: Hugh Dickins +Acked-by: Michal Hocko +Tested-by: Helge Deller # parisc +Signed-off-by: Linus Torvalds +[wt: backport to 4.11: adjust context] +[wt: backport to 4.9: adjust context ; kernel doc was not in admin-guide] +[wt: backport to 4.4: adjust context ; drop ppc hugetlb_radix changes] +[wt: backport to 3.18: adjust context ; no FOLL_POPULATE ; + s390 uses generic arch_get_unmapped_area()] +Signed-off-by: Willy Tarreau +[gkh: minor build fixes for 3.18] +Signed-off-by: Greg Kroah-Hartman +[PG: backport from 3.18 ; delete orphaned stack_guard_page().] +Signed-off-by: Paul Gortmaker +--- + Documentation/kernel-parameters.txt | 7 ++ + arch/arc/mm/mmap.c | 2 +- + arch/arm/mm/mmap.c | 4 +- + arch/frv/mm/elf-fdpic.c | 2 +- + arch/mips/mm/mmap.c | 2 +- + arch/powerpc/mm/slice.c | 2 +- + arch/sh/mm/mmap.c | 4 +- + arch/sparc/kernel/sys_sparc_64.c | 4 +- + arch/sparc/mm/hugetlbpage.c | 2 +- + arch/tile/mm/hugetlbpage.c | 2 +- + arch/x86/kernel/sys_x86_64.c | 4 +- + arch/x86/mm/hugetlbpage.c | 2 +- + arch/xtensa/kernel/syscall.c | 2 +- + fs/hugetlbfs/inode.c | 2 +- + fs/proc/task_mmu.c | 4 -- + include/linux/mm.h | 53 +++++++------- + mm/memory.c | 52 +------------- + mm/mmap.c | 140 ++++++++++++++++++++++-------------- + 18 files changed, 138 insertions(+), 152 deletions(-) + +diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt +index 8ecf802..057047e 100644 +--- a/Documentation/kernel-parameters.txt ++++ b/Documentation/kernel-parameters.txt +@@ -2907,6 +2907,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted. + spia_pedr= + spia_peddr= + ++ stack_guard_gap= [MM] ++ override the default stack gap protection. The value ++ is in page units and it defines how many pages prior ++ to (for stacks growing down) resp. after (for stacks ++ growing up) the main stack are reserved for no other ++ mapping. Default value is 256 pages. ++ + stacktrace [FTRACE] + Enabled the stack tracer on boot up. + +diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c +index 2e06d56..cf4ae69 100644 +--- a/arch/arc/mm/mmap.c ++++ b/arch/arc/mm/mmap.c +@@ -64,7 +64,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c +index 5ef506c..984509e 100644 +--- a/arch/arm/mm/mmap.c ++++ b/arch/arm/mm/mmap.c +@@ -89,7 +89,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +@@ -140,7 +140,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c +index 836f147..efa59f1 100644 +--- a/arch/frv/mm/elf-fdpic.c ++++ b/arch/frv/mm/elf-fdpic.c +@@ -74,7 +74,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi + addr = PAGE_ALIGN(addr); + vma = find_vma(current->mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + goto success; + } + +diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c +index 7e5fe27..0bb4295 100644 +--- a/arch/mips/mm/mmap.c ++++ b/arch/mips/mm/mmap.c +@@ -92,7 +92,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c +index 7ce9cf3..887365a 100644 +--- a/arch/powerpc/mm/slice.c ++++ b/arch/powerpc/mm/slice.c +@@ -103,7 +103,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, + if ((mm->task_size - len) < addr) + return 0; + vma = find_vma(mm, addr); +- return (!vma || (addr + len) <= vma->vm_start); ++ return (!vma || (addr + len) <= vm_start_gap(vma)); + } + + static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice) +diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c +index 6777177..7df7d59 100644 +--- a/arch/sh/mm/mmap.c ++++ b/arch/sh/mm/mmap.c +@@ -63,7 +63,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +@@ -113,7 +113,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c +index 2daaaa6..79b981e 100644 +--- a/arch/sparc/kernel/sys_sparc_64.c ++++ b/arch/sparc/kernel/sys_sparc_64.c +@@ -119,7 +119,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi + + vma = find_vma(mm, addr); + if (task_size - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +@@ -182,7 +182,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + + vma = find_vma(mm, addr); + if (task_size - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c +index d2b5944..ce49370 100644 +--- a/arch/sparc/mm/hugetlbpage.c ++++ b/arch/sparc/mm/hugetlbpage.c +@@ -118,7 +118,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (task_size - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) +diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c +index 650ccff..c75eac7 100644 +--- a/arch/tile/mm/hugetlbpage.c ++++ b/arch/tile/mm/hugetlbpage.c +@@ -297,7 +297,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + if (current->mm->get_unmapped_area == arch_get_unmapped_area) +diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c +index 30277e2..d050393 100644 +--- a/arch/x86/kernel/sys_x86_64.c ++++ b/arch/x86/kernel/sys_x86_64.c +@@ -127,7 +127,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (end - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +@@ -166,7 +166,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c +index ae1aa71..6adf3d9 100644 +--- a/arch/x86/mm/hugetlbpage.c ++++ b/arch/x86/mm/hugetlbpage.c +@@ -341,7 +341,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) +diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c +index 5d3f7a1..1ff0b92 100644 +--- a/arch/xtensa/kernel/syscall.c ++++ b/arch/xtensa/kernel/syscall.c +@@ -86,7 +86,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, + /* At this point: (!vmm || addr < vmm->vm_end). */ + if (TASK_SIZE - len < addr) + return -ENOMEM; +- if (!vmm || addr + len <= vmm->vm_start) ++ if (!vmm || addr + len <= vm_start_gap(vmm)) + return addr; + addr = vmm->vm_end; + if (flags & MAP_SHARED) +diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c +index 4e5f332..db7d89c 100644 +--- a/fs/hugetlbfs/inode.c ++++ b/fs/hugetlbfs/inode.c +@@ -169,7 +169,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma))) + return addr; + } + +diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c +index 6cef36f..143c51a 100644 +--- a/fs/proc/task_mmu.c ++++ b/fs/proc/task_mmu.c +@@ -283,11 +283,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) + + /* We don't show the stack guard page in /proc/maps */ + start = vma->vm_start; +- if (stack_guard_page_start(vma, start)) +- start += PAGE_SIZE; + end = vma->vm_end; +- if (stack_guard_page_end(vma, end)) +- end -= PAGE_SIZE; + + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + start, +diff --git a/include/linux/mm.h b/include/linux/mm.h +index 8799839..052520f 100644 +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1068,34 +1068,6 @@ int set_page_dirty(struct page *page); + int set_page_dirty_lock(struct page *page); + int clear_page_dirty_for_io(struct page *page); + +-/* Is the vma a continuation of the stack vma above it? */ +-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) +-{ +- return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +-} +- +-static inline int stack_guard_page_start(struct vm_area_struct *vma, +- unsigned long addr) +-{ +- return (vma->vm_flags & VM_GROWSDOWN) && +- (vma->vm_start == addr) && +- !vma_growsdown(vma->vm_prev, addr); +-} +- +-/* Is the vma a continuation of the stack vma below it? */ +-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) +-{ +- return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); +-} +- +-static inline int stack_guard_page_end(struct vm_area_struct *vma, +- unsigned long addr) +-{ +- return (vma->vm_flags & VM_GROWSUP) && +- (vma->vm_end == addr) && +- !vma_growsup(vma->vm_next, addr); +-} +- + extern pid_t + vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group); + +@@ -1621,6 +1593,7 @@ unsigned long ra_submit(struct file_ra_state *ra, + struct address_space *mapping, + struct file *filp); + ++extern unsigned long stack_guard_gap; + /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ + extern int expand_stack(struct vm_area_struct *vma, unsigned long address); + +@@ -1649,6 +1622,30 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m + return vma; + } + ++static inline unsigned long vm_start_gap(struct vm_area_struct *vma) ++{ ++ unsigned long vm_start = vma->vm_start; ++ ++ if (vma->vm_flags & VM_GROWSDOWN) { ++ vm_start -= stack_guard_gap; ++ if (vm_start > vma->vm_start) ++ vm_start = 0; ++ } ++ return vm_start; ++} ++ ++static inline unsigned long vm_end_gap(struct vm_area_struct *vma) ++{ ++ unsigned long vm_end = vma->vm_end; ++ ++ if (vma->vm_flags & VM_GROWSUP) { ++ vm_end += stack_guard_gap; ++ if (vm_end < vma->vm_end) ++ vm_end = -PAGE_SIZE; ++ } ++ return vm_end; ++} ++ + static inline unsigned long vma_pages(struct vm_area_struct *vma) + { + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; +diff --git a/mm/memory.c b/mm/memory.c +index 3d2372f..eb4060c 100644 +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -1654,12 +1654,6 @@ no_page_table: + return page; + } + +-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) +-{ +- return stack_guard_page_start(vma, addr) || +- stack_guard_page_end(vma, addr+PAGE_SIZE); +-} +- + /** + * __get_user_pages() - pin user pages in memory + * @tsk: task_struct of target task +@@ -1827,11 +1821,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + int ret; + unsigned int fault_flags = 0; + +- /* For mlock, just skip the stack guard page. */ +- if (foll_flags & FOLL_MLOCK) { +- if (stack_guard_page(vma, start)) +- goto next_page; +- } ++ /* mlock all present pages, but do not fault in new pages */ ++ if (foll_flags & FOLL_MLOCK) ++ return -ENOENT; + if (foll_flags & FOLL_WRITE) + fault_flags |= FAULT_FLAG_WRITE; + if (nonblocking) +@@ -3191,40 +3183,6 @@ out_release: + } + + /* +- * This is like a special single-page "expand_{down|up}wards()", +- * except we must first make sure that 'address{-|+}PAGE_SIZE' +- * doesn't hit another vma. +- */ +-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address) +-{ +- address &= PAGE_MASK; +- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) { +- struct vm_area_struct *prev = vma->vm_prev; +- +- /* +- * Is there a mapping abutting this one below? +- * +- * That's only ok if it's the same stack mapping +- * that has gotten split.. +- */ +- if (prev && prev->vm_end == address) +- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM; +- +- expand_downwards(vma, address - PAGE_SIZE); +- } +- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) { +- struct vm_area_struct *next = vma->vm_next; +- +- /* As VM_GROWSDOWN but s/below/above/ */ +- if (next && next->vm_start == address + PAGE_SIZE) +- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM; +- +- expand_upwards(vma, address + PAGE_SIZE); +- } +- return 0; +-} +- +-/* + * We enter with non-exclusive mmap_sem (to exclude vma changes, + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. +@@ -3243,10 +3201,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + +- /* Check if we need to add a guard page to the stack */ +- if (check_stack_guard_page(vma, address) < 0) +- return VM_FAULT_SIGBUS; +- + /* Use the zero-page for reads */ + if (!(flags & FAULT_FLAG_WRITE)) { + entry = pte_mkspecial(pfn_pte(my_zero_pfn(address), +diff --git a/mm/mmap.c b/mm/mmap.c +index 670654d..35eea0a 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -265,6 +265,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) + unsigned long rlim, retval; + unsigned long newbrk, oldbrk; + struct mm_struct *mm = current->mm; ++ struct vm_area_struct *next; + unsigned long min_brk; + bool populate; + +@@ -310,7 +311,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) + } + + /* Check against existing mmap mappings. */ +- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) ++ next = find_vma(mm, oldbrk); ++ if (next && newbrk + PAGE_SIZE > vm_start_gap(next)) + goto out; + + /* Ok, looks good - let it rip. */ +@@ -333,10 +335,22 @@ out: + + static long vma_compute_subtree_gap(struct vm_area_struct *vma) + { +- unsigned long max, subtree_gap; +- max = vma->vm_start; +- if (vma->vm_prev) +- max -= vma->vm_prev->vm_end; ++ unsigned long max, prev_end, subtree_gap; ++ ++ /* ++ * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we ++ * allow two stack_guard_gaps between them here, and when choosing ++ * an unmapped area; whereas when expanding we only require one. ++ * That's a little inconsistent, but keeps the code here simpler. ++ */ ++ max = vm_start_gap(vma); ++ if (vma->vm_prev) { ++ prev_end = vm_end_gap(vma->vm_prev); ++ if (max > prev_end) ++ max -= prev_end; ++ else ++ max = 0; ++ } + if (vma->vm_rb.rb_left) { + subtree_gap = rb_entry(vma->vm_rb.rb_left, + struct vm_area_struct, vm_rb)->rb_subtree_gap; +@@ -424,7 +438,7 @@ void validate_mm(struct mm_struct *mm) + anon_vma_unlock_read(anon_vma); + } + +- highest_address = vma->vm_end; ++ highest_address = vm_end_gap(vma); + vma = vma->vm_next; + i++; + } +@@ -592,7 +606,7 @@ void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, + if (vma->vm_next) + vma_gap_update(vma->vm_next); + else +- mm->highest_vm_end = vma->vm_end; ++ mm->highest_vm_end = vm_end_gap(vma); + + /* + * vma->vm_prev wasn't known when we followed the rbtree to find the +@@ -844,7 +858,7 @@ again: remove_next = 1 + (end > next->vm_end); + vma_gap_update(vma); + if (end_changed) { + if (!next) +- mm->highest_vm_end = end; ++ mm->highest_vm_end = vm_end_gap(vma); + else if (!adjust_next) + vma_gap_update(next); + } +@@ -1681,7 +1695,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) + + while (true) { + /* Visit left subtree if it looks promising */ +- gap_end = vma->vm_start; ++ gap_end = vm_start_gap(vma); + if (gap_end >= low_limit && vma->vm_rb.rb_left) { + struct vm_area_struct *left = + rb_entry(vma->vm_rb.rb_left, +@@ -1692,7 +1706,7 @@ unsigned long unmapped_area(struct vm_unmapped_area_info *info) + } + } + +- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; ++ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; + check_current: + /* Check if current node has a suitable gap */ + if (gap_start > high_limit) +@@ -1719,8 +1733,8 @@ check_current: + vma = rb_entry(rb_parent(prev), + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_left) { +- gap_start = vma->vm_prev->vm_end; +- gap_end = vma->vm_start; ++ gap_start = vm_end_gap(vma->vm_prev); ++ gap_end = vm_start_gap(vma); + goto check_current; + } + } +@@ -1784,7 +1798,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + + while (true) { + /* Visit right subtree if it looks promising */ +- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0; ++ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0; + if (gap_start <= high_limit && vma->vm_rb.rb_right) { + struct vm_area_struct *right = + rb_entry(vma->vm_rb.rb_right, +@@ -1797,7 +1811,7 @@ unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) + + check_current: + /* Check if current node has a suitable gap */ +- gap_end = vma->vm_start; ++ gap_end = vm_start_gap(vma); + if (gap_end < low_limit) + return -ENOMEM; + if (gap_start <= high_limit && gap_end - gap_start >= length) +@@ -1823,7 +1837,7 @@ check_current: + struct vm_area_struct, vm_rb); + if (prev == vma->vm_rb.rb_right) { + gap_start = vma->vm_prev ? +- vma->vm_prev->vm_end : 0; ++ vm_end_gap(vma->vm_prev) : 0; + goto check_current; + } + } +@@ -1861,7 +1875,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) + { + struct mm_struct *mm = current->mm; +- struct vm_area_struct *vma; ++ struct vm_area_struct *vma, *prev; + struct vm_unmapped_area_info info; + + if (len > TASK_SIZE - mmap_min_addr) +@@ -1872,9 +1886,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, + + if (addr) { + addr = PAGE_ALIGN(addr); +- vma = find_vma(mm, addr); ++ vma = find_vma_prev(mm, addr, &prev); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma)) && ++ (!prev || addr >= vm_end_gap(prev))) + return addr; + } + +@@ -1906,7 +1921,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) + { +- struct vm_area_struct *vma; ++ struct vm_area_struct *vma, *prev; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + struct vm_unmapped_area_info info; +@@ -1921,9 +1936,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); +- vma = find_vma(mm, addr); ++ vma = find_vma_prev(mm, addr, &prev); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && +- (!vma || addr + len <= vma->vm_start)) ++ (!vma || addr + len <= vm_start_gap(vma)) && ++ (!prev || addr >= vm_end_gap(prev))) + return addr; + } + +@@ -2063,7 +2079,8 @@ find_vma_prev(struct mm_struct *mm, unsigned long addr, + * update accounting. This is shared with both the + * grow-up and grow-down cases. + */ +-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) ++static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, ++ unsigned long grow) + { + struct mm_struct *mm = vma->vm_mm; + struct rlimit *rlim = current->signal->rlim; +@@ -2115,17 +2132,30 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns + */ + int expand_upwards(struct vm_area_struct *vma, unsigned long address) + { ++ struct vm_area_struct *next; ++ unsigned long gap_addr; + int error = 0; + + if (!(vma->vm_flags & VM_GROWSUP)) + return -EFAULT; + + /* Guard against wrapping around to address 0. */ +- if (address < PAGE_ALIGN(address+4)) +- address = PAGE_ALIGN(address+4); +- else ++ address &= PAGE_MASK; ++ address += PAGE_SIZE; ++ if (!address) + return -ENOMEM; + ++ /* Enforce stack_guard_gap */ ++ gap_addr = address + stack_guard_gap; ++ if (gap_addr < address) ++ return -ENOMEM; ++ next = vma->vm_next; ++ if (next && next->vm_start < gap_addr) { ++ if (!(next->vm_flags & VM_GROWSUP)) ++ return -ENOMEM; ++ /* Check that both stack segments have the same anon_vma? */ ++ } ++ + /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; +@@ -2166,7 +2196,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) + if (vma->vm_next) + vma_gap_update(vma->vm_next); + else +- vma->vm_mm->highest_vm_end = address; ++ vma->vm_mm->highest_vm_end = vm_end_gap(vma); + spin_unlock(&vma->vm_mm->page_table_lock); + + perf_event_mmap(vma); +@@ -2186,6 +2216,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) + int expand_downwards(struct vm_area_struct *vma, + unsigned long address) + { ++ struct vm_area_struct *prev; ++ unsigned long gap_addr; + int error; + + address &= PAGE_MASK; +@@ -2193,6 +2225,17 @@ int expand_downwards(struct vm_area_struct *vma, + if (error) + return error; + ++ /* Enforce stack_guard_gap */ ++ gap_addr = address - stack_guard_gap; ++ if (gap_addr > address) ++ return -ENOMEM; ++ prev = vma->vm_prev; ++ if (prev && prev->vm_end > gap_addr) { ++ if (!(prev->vm_flags & VM_GROWSDOWN)) ++ return -ENOMEM; ++ /* Check that both stack segments have the same anon_vma? */ ++ } ++ + /* We must make sure the anon_vma is allocated. */ + if (unlikely(anon_vma_prepare(vma))) + return -ENOMEM; +@@ -2244,28 +2287,25 @@ int expand_downwards(struct vm_area_struct *vma, + return error; + } + +-/* +- * Note how expand_stack() refuses to expand the stack all the way to +- * abut the next virtual mapping, *unless* that mapping itself is also +- * a stack mapping. We want to leave room for a guard page, after all +- * (the guard page itself is not added here, that is done by the +- * actual page faulting logic) +- * +- * This matches the behavior of the guard page logic (see mm/memory.c: +- * check_stack_guard_page()), which only allows the guard page to be +- * removed under these circumstances. +- */ ++/* enforced gap between the expanding stack and other mappings. */ ++unsigned long stack_guard_gap = 256UL< +Debugged-by: Linus Torvalds +Signed-off-by: Hugh Dickins +Acked-by: Michal Hocko +Signed-off-by: Linus Torvalds +Signed-off-by: Zhang Xiao +--- + mm/mmap.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/mm/mmap.c b/mm/mmap.c +index 35eea0a..bcbf7ae 100644 +--- a/mm/mmap.c ++++ b/mm/mmap.c +@@ -1711,7 +1711,8 @@ check_current: + /* Check if current node has a suitable gap */ + if (gap_start > high_limit) + return -ENOMEM; +- if (gap_end >= low_limit && gap_end - gap_start >= length) ++ if (gap_end >= low_limit && ++ gap_end > gap_start && gap_end - gap_start >= length) + goto found; + + /* Visit right subtree if it looks promising */ +@@ -1814,7 +1815,8 @@ check_current: + gap_end = vm_start_gap(vma); + if (gap_end < low_limit) + return -ENOMEM; +- if (gap_start <= high_limit && gap_end - gap_start >= length) ++ if (gap_start <= high_limit && ++ gap_end > gap_start && gap_end - gap_start >= length) + goto found; + + /* Visit left subtree if it looks promising */ +-- +1.9.1 + diff --git a/recipes-kernel/linux/linux-windriver_3.10.bb b/recipes-kernel/linux/linux-windriver_3.10.bb index bde432e..bf1c571 100644 --- a/recipes-kernel/linux/linux-windriver_3.10.bb +++ b/recipes-kernel/linux/linux-windriver_3.10.bb @@ -32,3 +32,7 @@ KSRC_linux_windriver_3_10 ?= "${THISDIR}/../../git/kernel-3.10.x.git" EXTRA_KERNEL_SRC_URI ?= "" SRC_URI = "git://${KSRC_linux_windriver_3_10};protocol=file;nocheckout=1;branch=${KBRANCH},meta;name=machine,meta ${EXTRA_KERNEL_SRC_URI}" +SRC_URI += "file://0001-mm-replace-vma_lock_anon_vma-with-anon_vma_lock_read.patch \ + file://0002-mm-larger-stack-guard-gap-between-vmas.patch \ + file://0003-mm-fix-new-crash-in-unmapped_area_topdown.patch \ + " -- 1.9.1