From a0dce7f0ac66cdd5b653a3b059eb1382c2bdf8a0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:30:39 -0800 Subject: [PATCH 01/72] drivers/memstick/core/ms_block.c: fix spelling of MSB_RP_RECIVE_STATUS_REG Cc: Maxim Levitsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/memstick/core/ms_block.c | 4 ++-- drivers/memstick/core/ms_block.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 9188ef5d677e..24f2f8473dee 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -401,10 +401,10 @@ again: sizeof(struct ms_status_register))) return 0; - msb->state = MSB_RP_RECIVE_STATUS_REG; + msb->state = MSB_RP_RECEIVE_STATUS_REG; return 0; - case MSB_RP_RECIVE_STATUS_REG: + case MSB_RP_RECEIVE_STATUS_REG: msb->regs.status = *(struct ms_status_register *)mrq->data; msb->state = MSB_RP_SEND_OOB_READ; /* fallthrough */ diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h index 96e637550988..c75198dbf139 100644 --- a/drivers/memstick/core/ms_block.h +++ b/drivers/memstick/core/ms_block.h @@ -223,7 +223,7 @@ enum msb_readpage_states { MSB_RP_RECEIVE_INT_REQ_RESULT, MSB_RP_SEND_READ_STATUS_REG, - MSB_RP_RECIVE_STATUS_REG, + MSB_RP_RECEIVE_STATUS_REG, MSB_RP_SEND_OOB_READ, MSB_RP_RECEIVE_OOB_READ, From b77d88d493b8fc7a4c2dadd3bb86d1dee2f53a56 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:40 -0800 Subject: [PATCH 02/72] mm: drop actor argument of do_generic_file_read() There's only one caller of do_generic_file_read() and the only actor is file_read_actor(). No reason to have a callback parameter. Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Reviewed-by: Wanpeng Li Cc: Matthew Wilcox Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index ae4846ff4849..b7749a92021c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1090,7 +1090,6 @@ static void shrink_readahead_size_eio(struct file *filp, * @filp: the file to read * @ppos: current file position * @desc: read_descriptor - * @actor: read method * * This is a generic file read routine, and uses the * mapping->a_ops->readpage() function for the actual low-level stuff. @@ -1099,7 +1098,7 @@ static void shrink_readahead_size_eio(struct file *filp, * of the logic when it comes to error handling etc. */ static void do_generic_file_read(struct file *filp, loff_t *ppos, - read_descriptor_t *desc, read_actor_t actor) + read_descriptor_t *desc) { struct address_space *mapping = filp->f_mapping; struct inode *inode = mapping->host; @@ -1200,13 +1199,14 @@ page_ok: * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... * - * The actor routine returns how many bytes were actually used.. + * The file_read_actor routine returns how many bytes were + * actually used.. * NOTE! This may not be the same as how much of a user buffer * we filled up (we may be padding etc), so we can only update * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ - ret = actor(desc, page, offset, nr); + ret = file_read_actor(desc, page, offset, nr); offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; @@ -1479,7 +1479,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, if (desc.count == 0) continue; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc); retval += desc.written; if (desc.error) { retval = retval ?: desc.error; From e9bb18c7b95d4dcf8c7f0e14f920ca6f03109e75 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:42 -0800 Subject: [PATCH 03/72] mm: avoid increase sizeof(struct page) due to split page table lock Alex Thorlton noticed that some massively threaded workloads work poorly, if THP enabled. This patchset fixes this by introducing split page table lock for PMD tables. hugetlbfs is not covered yet. This patchset is based on work by Naoya Horiguchi. : akpm result summary: : : THP off, v3.12-rc2: 18.059261877 seconds time elapsed : THP off, patched: 16.768027318 seconds time elapsed : : THP on, v3.12-rc2: 42.162306788 seconds time elapsed : THP on, patched: 8.397885779 seconds time elapsed : : HUGETLB, v3.12-rc2: 47.574936948 seconds time elapsed : HUGETLB, patched: 19.447481153 seconds time elapsed THP off, v3.12-rc2: ------------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 1037072.835207 task-clock # 57.426 CPUs utilized ( +- 3.59% ) 95,093 context-switches # 0.092 K/sec ( +- 3.93% ) 140 cpu-migrations # 0.000 K/sec ( +- 5.28% ) 10,000,550 page-faults # 0.010 M/sec ( +- 0.00% ) 2,455,210,400,261 cycles # 2.367 GHz ( +- 3.62% ) [83.33%] 2,429,281,882,056 stalled-cycles-frontend # 98.94% frontend cycles idle ( +- 3.67% ) [83.33%] 1,975,960,019,659 stalled-cycles-backend # 80.48% backend cycles idle ( +- 3.88% ) [66.68%] 46,503,296,013 instructions # 0.02 insns per cycle # 52.24 stalled cycles per insn ( +- 3.21% ) [83.34%] 9,278,997,542 branches # 8.947 M/sec ( +- 4.00% ) [83.34%] 89,881,640 branch-misses # 0.97% of all branches ( +- 1.17% ) [83.33%] 18.059261877 seconds time elapsed ( +- 2.65% ) THP on, v3.12-rc2: ------------------ Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 3114745.395974 task-clock # 73.875 CPUs utilized ( +- 1.84% ) 267,356 context-switches # 0.086 K/sec ( +- 1.84% ) 99 cpu-migrations # 0.000 K/sec ( +- 1.40% ) 58,313 page-faults # 0.019 K/sec ( +- 0.28% ) 7,416,635,817,510 cycles # 2.381 GHz ( +- 1.83% ) [83.33%] 7,342,619,196,993 stalled-cycles-frontend # 99.00% frontend cycles idle ( +- 1.88% ) [83.33%] 6,267,671,641,967 stalled-cycles-backend # 84.51% backend cycles idle ( +- 2.03% ) [66.67%] 117,819,935,165 instructions # 0.02 insns per cycle # 62.32 stalled cycles per insn ( +- 4.39% ) [83.34%] 28,899,314,777 branches # 9.278 M/sec ( +- 4.48% ) [83.34%] 71,787,032 branch-misses # 0.25% of all branches ( +- 1.03% ) [83.33%] 42.162306788 seconds time elapsed ( +- 1.73% ) HUGETLB, v3.12-rc2: ------------------- Performance counter stats for './thp_memscale_hugetlbfs -c 80 -b 512M' (5 runs): 2588052.787264 task-clock # 54.400 CPUs utilized ( +- 3.69% ) 246,831 context-switches # 0.095 K/sec ( +- 4.15% ) 138 cpu-migrations # 0.000 K/sec ( +- 5.30% ) 21,027 page-faults # 0.008 K/sec ( +- 0.01% ) 6,166,666,307,263 cycles # 2.383 GHz ( +- 3.68% ) [83.33%] 6,086,008,929,407 stalled-cycles-frontend # 98.69% frontend cycles idle ( +- 3.77% ) [83.33%] 5,087,874,435,481 stalled-cycles-backend # 82.51% backend cycles idle ( +- 4.41% ) [66.67%] 133,782,831,249 instructions # 0.02 insns per cycle # 45.49 stalled cycles per insn ( +- 4.30% ) [83.34%] 34,026,870,541 branches # 13.148 M/sec ( +- 4.24% ) [83.34%] 68,670,942 branch-misses # 0.20% of all branches ( +- 3.26% ) [83.33%] 47.574936948 seconds time elapsed ( +- 2.09% ) THP off, patched: ----------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 943301.957892 task-clock # 56.256 CPUs utilized ( +- 3.01% ) 86,218 context-switches # 0.091 K/sec ( +- 3.17% ) 121 cpu-migrations # 0.000 K/sec ( +- 6.64% ) 10,000,551 page-faults # 0.011 M/sec ( +- 0.00% ) 2,230,462,457,654 cycles # 2.365 GHz ( +- 3.04% ) [83.32%] 2,204,616,385,805 stalled-cycles-frontend # 98.84% frontend cycles idle ( +- 3.09% ) [83.32%] 1,778,640,046,926 stalled-cycles-backend # 79.74% backend cycles idle ( +- 3.47% ) [66.69%] 45,995,472,617 instructions # 0.02 insns per cycle # 47.93 stalled cycles per insn ( +- 2.51% ) [83.34%] 9,179,700,174 branches # 9.731 M/sec ( +- 3.04% ) [83.35%] 89,166,529 branch-misses # 0.97% of all branches ( +- 1.45% ) [83.33%] 16.768027318 seconds time elapsed ( +- 2.47% ) THP on, patched: ---------------- Performance counter stats for './thp_memscale -c 80 -b 512m' (5 runs): 458793.837905 task-clock # 54.632 CPUs utilized ( +- 0.79% ) 41,831 context-switches # 0.091 K/sec ( +- 0.97% ) 98 cpu-migrations # 0.000 K/sec ( +- 1.66% ) 57,829 page-faults # 0.126 K/sec ( +- 0.62% ) 1,077,543,336,716 cycles # 2.349 GHz ( +- 0.81% ) [83.33%] 1,067,403,802,964 stalled-cycles-frontend # 99.06% frontend cycles idle ( +- 0.87% ) [83.33%] 864,764,616,143 stalled-cycles-backend # 80.25% backend cycles idle ( +- 0.73% ) [66.68%] 16,129,177,440 instructions # 0.01 insns per cycle # 66.18 stalled cycles per insn ( +- 7.94% ) [83.35%] 3,618,938,569 branches # 7.888 M/sec ( +- 8.46% ) [83.36%] 33,242,032 branch-misses # 0.92% of all branches ( +- 2.02% ) [83.32%] 8.397885779 seconds time elapsed ( +- 0.18% ) HUGETLB, patched: ----------------- Performance counter stats for './thp_memscale_hugetlbfs -c 80 -b 512M' (5 runs): 395353.076837 task-clock # 20.329 CPUs utilized ( +- 8.16% ) 55,730 context-switches # 0.141 K/sec ( +- 5.31% ) 138 cpu-migrations # 0.000 K/sec ( +- 4.24% ) 21,027 page-faults # 0.053 K/sec ( +- 0.00% ) 930,219,717,244 cycles # 2.353 GHz ( +- 8.21% ) [83.32%] 914,295,694,103 stalled-cycles-frontend # 98.29% frontend cycles idle ( +- 8.35% ) [83.33%] 704,137,950,187 stalled-cycles-backend # 75.70% backend cycles idle ( +- 9.16% ) [66.69%] 30,541,538,385 instructions # 0.03 insns per cycle # 29.94 stalled cycles per insn ( +- 3.98% ) [83.35%] 8,415,376,631 branches # 21.286 M/sec ( +- 3.61% ) [83.36%] 32,645,478 branch-misses # 0.39% of all branches ( +- 3.41% ) [83.32%] 19.447481153 seconds time elapsed ( +- 2.00% ) This patch (of 11): CONFIG_GENERIC_LOCKBREAK increases sizeof(spinlock_t) to 8 bytes. It leads to increase sizeof(struct page) by 4 bytes on 32-bit system if split page table lock is in use, since page->ptl shares space in union with longs and pointers. Let's disable split page table lock on 32-bit systems with GENERIC_LOCKBREAK enabled. Signed-off-by: Kirill A. Shutemov Cc: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index 3f4ffda152bb..c28d247e524a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -219,6 +219,7 @@ config SPLIT_PTLOCK_CPUS default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC + default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" # From 57c1ffcefb5acb3c8b5f8436c325a6bdbd8e9c78 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:45 -0800 Subject: [PATCH 04/72] mm: rename USE_SPLIT_PTLOCKS to USE_SPLIT_PTE_PTLOCKS We're going to introduce split page table lock for PMD level. Let's rename existing split ptlock for PTE level to avoid confusion. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/fault-armv.c | 6 +++--- arch/x86/xen/mmu.c | 6 +++--- include/linux/mm.h | 6 +++--- include/linux/mm_types.h | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c index 2a5907b5c8d2..ff379ac115df 100644 --- a/arch/arm/mm/fault-armv.c +++ b/arch/arm/mm/fault-armv.c @@ -65,7 +65,7 @@ static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address, return ret; } -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS /* * If we are using split PTE locks, then we need to take the page * lock here. Otherwise we are using shared mm->page_table_lock @@ -84,10 +84,10 @@ static inline void do_pte_unlock(spinlock_t *ptl) { spin_unlock(ptl); } -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */ static inline void do_pte_lock(spinlock_t *ptl) {} static inline void do_pte_unlock(spinlock_t *ptl) {} -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ static int adjust_pte(struct vm_area_struct *vma, unsigned long address, unsigned long pfn) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fdc3ba28ca38..455c873ce009 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -796,7 +796,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) { spinlock_t *ptl = NULL; -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS ptl = __pte_lockptr(page); spin_lock_nest_lock(ptl, &mm->page_table_lock); #endif @@ -1637,7 +1637,7 @@ static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, __set_pfn_prot(pfn, PAGE_KERNEL_RO); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); xen_mc_issue(PARAVIRT_LAZY_MMU); @@ -1671,7 +1671,7 @@ static inline void xen_release_ptpage(unsigned long pfn, unsigned level) if (!PageHighMem(page)) { xen_mc_batch(); - if (level == PT_PTE && USE_SPLIT_PTLOCKS) + if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS) __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); __set_pfn_prot(pfn, PAGE_KERNEL); diff --git a/include/linux/mm.h b/include/linux/mm.h index 42a35d94b82c..dc3333d6c986 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1316,7 +1316,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS /* * We tuck a spinlock to guard each pagetable page into its struct page, * at page->private, with BUILD_BUG_ON to make sure that this will not @@ -1329,14 +1329,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a } while (0) #define pte_lock_deinit(page) ((page)->mapping = NULL) #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) -#else /* !USE_SPLIT_PTLOCKS */ +#else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ #define pte_lock_init(page) do {} while (0) #define pte_lock_deinit(page) do {} while (0) #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ static inline void pgtable_page_ctor(struct page *page) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index a3198e5aaf4e..f1ff66dfa79e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,7 +23,7 @@ struct address_space; -#define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) /* * Each physical page in the system has a struct page associated with @@ -141,7 +141,7 @@ struct page { * indicates order in the buddy * system if PG_buddy is set. */ -#if USE_SPLIT_PTLOCKS +#if USE_SPLIT_PTE_PTLOCKS spinlock_t ptl; #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ @@ -309,14 +309,14 @@ enum { NR_MM_COUNTERS }; -#if USE_SPLIT_PTLOCKS && defined(CONFIG_MMU) +#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU) #define SPLIT_RSS_COUNTING /* per-thread cached information, */ struct task_rss_stat { int events; /* for synchronization threshold */ int count[NR_MM_COUNTERS]; }; -#endif /* USE_SPLIT_PTLOCKS */ +#endif /* USE_SPLIT_PTE_PTLOCKS */ struct mm_rss_stat { atomic_long_t count[NR_MM_COUNTERS]; From e1f56c89b040134add93f686931cc266541d239a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:48 -0800 Subject: [PATCH 05/72] mm: convert mm->nr_ptes to atomic_long_t With split page table lock for PMD level we can't hold mm->page_table_lock while updating nr_ptes. Let's convert it to atomic_long_t to avoid races. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- include/linux/mm_types.h | 2 +- kernel/fork.c | 2 +- mm/huge_memory.c | 10 +++++----- mm/memory.c | 4 ++-- mm/mmap.c | 3 ++- mm/oom_kill.c | 6 +++--- 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index abbe825d20ff..8faaebdc6b02 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -62,7 +62,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE*sizeof(pte_t)*mm->nr_ptes) >> 10, + (PTRS_PER_PTE * sizeof(pte_t) * + atomic_long_read(&mm->nr_ptes)) >> 10, swap << (PAGE_SHIFT-10)); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f1ff66dfa79e..566df579c51f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -339,6 +339,7 @@ struct mm_struct { pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ + atomic_long_t nr_ptes; /* Page table pages */ int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ @@ -360,7 +361,6 @@ struct mm_struct { unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE */ unsigned long stack_vm; /* VM_GROWSUP/DOWN */ unsigned long def_flags; - unsigned long nr_ptes; /* Page table pages */ unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; diff --git a/kernel/fork.c b/kernel/fork.c index f6d11fc67f72..e2520756e005 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -532,7 +532,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) mm->flags = (current->mm) ? (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; mm->core_state = NULL; - mm->nr_ptes = 0; + atomic_long_set(&mm->nr_ptes, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); mm_init_aio(mm); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0556c6a44959..e5b2d316be2e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -738,7 +738,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); spin_unlock(&mm->page_table_lock); } @@ -771,7 +771,7 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); return true; } @@ -896,7 +896,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd = pmd_mkold(pmd_wrprotect(pmd)); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); set_pmd_at(dst_mm, addr, dst_pmd, pmd); - dst_mm->nr_ptes++; + atomic_long_inc(&dst_mm->nr_ptes); ret = 0; out_unlock: @@ -1392,7 +1392,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(&tlb->mm->page_table_lock); put_huge_zero_page(); } else { @@ -1401,7 +1401,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, VM_BUG_ON(page_mapcount(page) < 0); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON(!PageHead(page)); - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(&tlb->mm->page_table_lock); tlb_remove_page(tlb, page); } diff --git a/mm/memory.c b/mm/memory.c index bf8665849a5f..0b5a93a49f27 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -382,7 +382,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, pgtable_t token = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free_tlb(tlb, token, addr); - tlb->mm->nr_ptes--; + atomic_long_dec(&tlb->mm->nr_ptes); } static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -573,7 +573,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, spin_lock(&mm->page_table_lock); wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ - mm->nr_ptes++; + atomic_long_inc(&mm->nr_ptes); pmd_populate(mm, pmd, new); new = NULL; } else if (unlikely(pmd_trans_splitting(*pmd))) diff --git a/mm/mmap.c b/mm/mmap.c index 5a6baddde15d..834b2d785f1e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2724,7 +2724,8 @@ void exit_mmap(struct mm_struct *mm) } vm_unacct_memory(nr_accounted); - WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + WARN_ON(atomic_long_read(&mm->nr_ptes) > + (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); } /* Insert vm structure into process list sorted by address diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6738c47f1f72..1e4a600a6163 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -161,7 +161,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ - points = get_mm_rss(p->mm) + p->mm->nr_ptes + + points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) + get_mm_counter(p->mm, MM_SWAPENTS); task_unlock(p); @@ -364,10 +364,10 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas continue; } - pr_info("[%5d] %5d %5d %8lu %8lu %7lu %8lu %5hd %s\n", + pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu %5hd %s\n", task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), - task->mm->nr_ptes, + atomic_long_read(&task->mm->nr_ptes), get_mm_counter(task->mm, MM_SWAPENTS), task->signal->oom_score_adj, task->comm); task_unlock(task); From 9a86cb7bdc4ccbe3f99a1ca275b90a322a90f9ce Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:51 -0800 Subject: [PATCH 06/72] mm: introduce api for split page table lock for PMD level Basic api, backed by mm->page_table_lock for now. Actual implementation will be added later. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index dc3333d6c986..4f4ca41fcf27 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1378,6 +1378,19 @@ static inline void pgtable_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} + + +static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) +{ + spinlock_t *ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); + return ptl; +} + extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); From bf929152e9f6c49b66fad4ebf08cc95b02ce48f5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:54 -0800 Subject: [PATCH 07/72] mm, thp: change pmd_trans_huge_lock() to return taken lock With split ptlock it's important to know which lock pmd_trans_huge_lock() took. This patch adds one more parameter to the function to return the lock. In most places migration to new api is trivial. Exception is move_huge_pmd(): we need to take two locks if pmd tables are different. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 13 +++++++------ include/linux/huge_mm.h | 14 +++++++------- mm/huge_memory.c | 40 +++++++++++++++++++++++++++------------- mm/memcontrol.c | 10 +++++----- 4 files changed, 46 insertions(+), 31 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8faaebdc6b02..42b5cf5d0326 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -506,9 +506,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); mss->anonymous_thp += HPAGE_PMD_SIZE; return 0; } @@ -999,13 +999,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, { struct vm_area_struct *vma; struct pagemapread *pm = walk->private; + spinlock_t *ptl; pte_t *pte; int err = 0; pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); /* find the first VMA at or above 'addr' */ vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { + if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1023,7 +1024,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (err) break; } - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return err; } @@ -1325,7 +1326,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, md = walk->private; - if (pmd_trans_huge_lock(pmd, md->vma) == 1) { + if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; @@ -1333,7 +1334,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); - spin_unlock(&walk->mm->page_table_lock); + spin_unlock(ptl); return 0; } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 3935428c57cf..4aca0d8da112 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -129,15 +129,15 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, long adjust_next); -extern int __pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma); +extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl); /* mmap_sem must be held on entry */ -static inline int pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma) +static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); if (pmd_trans_huge(*pmd)) - return __pmd_trans_huge_lock(pmd, vma); + return __pmd_trans_huge_lock(pmd, vma, ptl); else return 0; } @@ -215,8 +215,8 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, long adjust_next) { } -static inline int pmd_trans_huge_lock(pmd_t *pmd, - struct vm_area_struct *vma) +static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e5b2d316be2e..471eb04066ff 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1376,9 +1376,10 @@ out: int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr) { + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { struct page *page; pgtable_t pgtable; pmd_t orig_pmd; @@ -1393,7 +1394,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { atomic_long_dec(&tlb->mm->nr_ptes); - spin_unlock(&tlb->mm->page_table_lock); + spin_unlock(ptl); put_huge_zero_page(); } else { page = pmd_page(orig_pmd); @@ -1402,7 +1403,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); VM_BUG_ON(!PageHead(page)); atomic_long_dec(&tlb->mm->nr_ptes); - spin_unlock(&tlb->mm->page_table_lock); + spin_unlock(ptl); tlb_remove_page(tlb, page); } pte_free(tlb->mm, pgtable); @@ -1415,14 +1416,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec) { + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { /* * All logical pages in the range are present * if backed by a huge page. */ - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); memset(vec, 1, (end - addr) >> PAGE_SHIFT); ret = 1; } @@ -1435,6 +1437,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long old_end, pmd_t *old_pmd, pmd_t *new_pmd) { + spinlock_t *old_ptl, *new_ptl; int ret = 0; pmd_t pmd; @@ -1455,12 +1458,21 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, goto out; } - ret = __pmd_trans_huge_lock(old_pmd, vma); + /* + * We don't have to worry about the ordering of src and dst + * ptlocks because exclusive mmap_sem prevents deadlock. + */ + ret = __pmd_trans_huge_lock(old_pmd, vma, &old_ptl); if (ret == 1) { + new_ptl = pmd_lockptr(mm, new_pmd); + if (new_ptl != old_ptl) + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - spin_unlock(&mm->page_table_lock); + if (new_ptl != old_ptl) + spin_unlock(new_ptl); + spin_unlock(old_ptl); } out: return ret; @@ -1476,9 +1488,10 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; int ret = 0; - if (__pmd_trans_huge_lock(pmd, vma) == 1) { + if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pmd_t entry; ret = 1; if (!prot_numa) { @@ -1507,7 +1520,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (ret == HPAGE_PMD_NR) set_pmd_at(mm, addr, pmd, entry); - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); } return ret; @@ -1520,12 +1533,13 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, * Note that if it returns 1, this routine returns without unlocking page * table locks. So callers must unlock them. */ -int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) +int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, + spinlock_t **ptl) { - spin_lock(&vma->vm_mm->page_table_lock); + *ptl = pmd_lock(vma->vm_mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(*ptl); wait_split_huge_page(vma->anon_vma, pmd); return -1; } else { @@ -1534,7 +1548,7 @@ int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) return 1; } } - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(*ptl); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e3cd40b2d5d9..f1a0ae6e11b8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6605,10 +6605,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, pte_t *pte; spinlock_t *ptl; - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE) mc.precharge += HPAGE_PMD_NR; - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } @@ -6797,9 +6797,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, * to be unlocked in __split_huge_page_splitting(), where the main * part of thp split is not executed yet. */ - if (pmd_trans_huge_lock(pmd, vma) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { if (mc.precharge < HPAGE_PMD_NR) { - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } target_type = get_mctgt_type_thp(vma, addr, *pmd, &target); @@ -6816,7 +6816,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, } put_page(page); } - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); return 0; } From 117b0791ac42f2ec447bc864e70ad622b5604059 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:56 -0800 Subject: [PATCH 08/72] mm, thp: move ptl taking inside page_check_address_pmd() With split page table lock we can't know which lock we need to take before we find the relevant pmd. Let's move lock taking inside the function. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 3 ++- mm/huge_memory.c | 43 ++++++++++++++++++++++++++--------------- mm/rmap.c | 13 +++++-------- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 4aca0d8da112..91672e2deec3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -54,7 +54,8 @@ enum page_check_address_pmd_flag { extern pmd_t *page_check_address_pmd(struct page *page, struct mm_struct *mm, unsigned long address, - enum page_check_address_pmd_flag flag); + enum page_check_address_pmd_flag flag, + spinlock_t **ptl); #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1<vm_mm; + spinlock_t *ptl; pmd_t *pmd; int ret = 0; /* For mmu_notifiers */ @@ -1600,9 +1612,8 @@ static int __split_huge_page_splitting(struct page *page, const unsigned long mmun_end = address + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG); + PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG, &ptl); if (pmd) { /* * We can't temporarily set the pmd to null in order @@ -1613,8 +1624,8 @@ static int __split_huge_page_splitting(struct page *page, */ pmdp_splitting_flush(vma, address, pmd); ret = 1; + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return ret; @@ -1745,14 +1756,14 @@ static int __split_huge_page_map(struct page *page, unsigned long address) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; pmd_t *pmd, _pmd; int ret = 0, i; pgtable_t pgtable; unsigned long haddr; - spin_lock(&mm->page_table_lock); pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); + PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG, &ptl); if (pmd) { pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -1807,8 +1818,8 @@ static int __split_huge_page_map(struct page *page, pmdp_invalidate(vma, address, pmd); pmd_populate(mm, pmd, pgtable); ret = 1; + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index fd3ee7a54a13..b59d741dcf65 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -665,25 +665,23 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long *vm_flags) { struct mm_struct *mm = vma->vm_mm; + spinlock_t *ptl; int referenced = 0; if (unlikely(PageTransHuge(page))) { pmd_t *pmd; - spin_lock(&mm->page_table_lock); /* * rmap might return false positives; we must filter * these out using page_check_address_pmd(). */ pmd = page_check_address_pmd(page, mm, address, - PAGE_CHECK_ADDRESS_PMD_FLAG); - if (!pmd) { - spin_unlock(&mm->page_table_lock); + PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); + if (!pmd) goto out; - } if (vma->vm_flags & VM_LOCKED) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); *mapcount = 0; /* break early from loop */ *vm_flags |= VM_LOCKED; goto out; @@ -692,10 +690,9 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, /* go ahead even if the pmd is pmd_trans_splitting() */ if (pmdp_clear_flush_young_notify(vma, address, pmd)) referenced++; - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } else { pte_t *pte; - spinlock_t *ptl; /* * rmap might return false positives; we must filter From c389a250ab4cfa4a3775d9f2c45271618af6d5b2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:30:59 -0800 Subject: [PATCH 09/72] mm, thp: do not access mm->pmd_huge_pte directly Currently mm->pmd_huge_pte protected by page table lock. It will not work with split lock. We have to have per-pmd pmd_huge_pte for proper access serialization. For now, let's just introduce wrapper to access mm->pmd_huge_pte. Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/pgtable.c | 12 ++++++------ arch/sparc/mm/tlb.c | 12 ++++++------ include/linux/mm.h | 1 + mm/pgtable-generic.c | 12 ++++++------ 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0a2e5e086749..1ea18fcfa211 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1244,11 +1244,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(lh); else - list_add(lh, (struct list_head *) mm->pmd_huge_pte); - mm->pmd_huge_pte = pgtable; + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) @@ -1260,12 +1260,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); lh = (struct list_head *) pgtable; if (list_empty(lh)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = (pgtable_t) lh->next; + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; list_del(lh); } ptep = (pte_t *) pgtable; diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c index 7a91f288c708..656cc46a81f5 100644 --- a/arch/sparc/mm/tlb.c +++ b/arch/sparc/mm/tlb.c @@ -196,11 +196,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(lh); else - list_add(lh, (struct list_head *) mm->pmd_huge_pte); - mm->pmd_huge_pte = pgtable; + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; } pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) @@ -211,12 +211,12 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); lh = (struct list_head *) pgtable; if (list_empty(lh)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = (pgtable_t) lh->next; + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; list_del(lh); } pte_val(pgtable[0]) = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 4f4ca41fcf27..861cad53b744 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1383,6 +1383,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } +#define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) { diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 3929a40bd6c0..41fee3e5d570 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -154,11 +154,11 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, assert_spin_locked(&mm->page_table_lock); /* FIFO */ - if (!mm->pmd_huge_pte) + if (!pmd_huge_pte(mm, pmdp)) INIT_LIST_HEAD(&pgtable->lru); else - list_add(&pgtable->lru, &mm->pmd_huge_pte->lru); - mm->pmd_huge_pte = pgtable; + list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru); + pmd_huge_pte(mm, pmdp) = pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -173,11 +173,11 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) assert_spin_locked(&mm->page_table_lock); /* FIFO */ - pgtable = mm->pmd_huge_pte; + pgtable = pmd_huge_pte(mm, pmdp); if (list_empty(&pgtable->lru)) - mm->pmd_huge_pte = NULL; + pmd_huge_pte(mm, pmdp) = NULL; else { - mm->pmd_huge_pte = list_entry(pgtable->lru.next, + pmd_huge_pte(mm, pmdp) = list_entry(pgtable->lru.next, struct page, lru); list_del(&pgtable->lru); } From cb900f41215447433cbc456d1c4294e858a84d7c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:02 -0800 Subject: [PATCH 10/72] mm, hugetlb: convert hugetlbfs to use split pmd lock Hugetlb supports multiple page sizes. We use split lock only for PMD level, but not for PUD. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- include/linux/hugetlb.h | 26 ++++++++++ include/linux/swapops.h | 7 +-- mm/hugetlb.c | 110 ++++++++++++++++++++++++---------------- mm/mempolicy.c | 5 +- mm/migrate.c | 7 +-- mm/rmap.c | 2 +- 7 files changed, 105 insertions(+), 54 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index c805d5b69ba1..a77d2b299199 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -1,8 +1,8 @@ #include -#include #include #include #include +#include #include #include #include diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 0393270466c3..acd2010328f3 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -392,6 +392,15 @@ static inline int hugepage_migration_support(struct hstate *h) return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); } +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + if (huge_page_size(h) == PMD_SIZE) + return pmd_lockptr(mm, (pmd_t *) pte); + VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); + return &mm->page_table_lock; +} + #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page_node(h, nid) NULL @@ -401,6 +410,7 @@ struct hstate {}; #define hstate_sizelog(s) NULL #define hstate_vma(v) NULL #define hstate_inode(i) NULL +#define page_hstate(page) NULL #define huge_page_size(h) PAGE_SIZE #define huge_page_mask(h) PAGE_MASK #define vma_kernel_pagesize(v) PAGE_SIZE @@ -421,6 +431,22 @@ static inline pgoff_t basepage_index(struct page *page) #define dissolve_free_huge_pages(s, e) do {} while (0) #define pmd_huge_support() 0 #define hugepage_migration_support(h) 0 + +static inline spinlock_t *huge_pte_lockptr(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} #endif /* CONFIG_HUGETLB_PAGE */ +static inline spinlock_t *huge_pte_lock(struct hstate *h, + struct mm_struct *mm, pte_t *pte) +{ + spinlock_t *ptl; + + ptl = huge_pte_lockptr(h, mm, pte); + spin_lock(ptl); + return ptl; +} + #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 8d4fa82bfb91..c0f75261a728 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -139,7 +139,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -extern void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte); +extern void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte); #else #define make_migration_entry(page, write) swp_entry(0, 0) @@ -151,8 +152,8 @@ static inline int is_migration_entry(swp_entry_t swp) static inline void make_migration_entry_read(swp_entry_t *entryp) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } -static inline void migration_entry_wait_huge(struct mm_struct *mm, - pte_t *pte) { } +static inline void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte) { } static inline int is_write_migration_entry(swp_entry_t entry) { return 0; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0b7656e804d1..7d57af21f49e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2376,6 +2376,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; for (addr = vma->vm_start; addr < vma->vm_end; addr += sz) { + spinlock_t *src_ptl, *dst_ptl; src_pte = huge_pte_offset(src, addr); if (!src_pte) continue; @@ -2387,8 +2388,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, if (dst_pte == src_pte) continue; - spin_lock(&dst->page_table_lock); - spin_lock_nested(&src->page_table_lock, SINGLE_DEPTH_NESTING); + dst_ptl = huge_pte_lock(h, dst, dst_pte); + src_ptl = huge_pte_lockptr(h, src, src_pte); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); if (!huge_pte_none(huge_ptep_get(src_pte))) { if (cow) huge_ptep_set_wrprotect(src, addr, src_pte); @@ -2398,8 +2400,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, page_dup_rmap(ptepage); set_huge_pte_at(dst, addr, dst_pte, entry); } - spin_unlock(&src->page_table_lock); - spin_unlock(&dst->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); } return 0; @@ -2442,6 +2444,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address; pte_t *ptep; pte_t pte; + spinlock_t *ptl; struct page *page; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); @@ -2455,25 +2458,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, tlb_start_vma(tlb, vma); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); again: - spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); if (!ptep) continue; + ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) - continue; + goto unlock; pte = huge_ptep_get(ptep); if (huge_pte_none(pte)) - continue; + goto unlock; /* * HWPoisoned hugepage is already unmapped and dropped reference */ if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { huge_pte_clear(mm, address, ptep); - continue; + goto unlock; } page = pte_page(pte); @@ -2484,7 +2487,7 @@ again: */ if (ref_page) { if (page != ref_page) - continue; + goto unlock; /* * Mark the VMA as having unmapped its page so that @@ -2501,13 +2504,18 @@ again: page_remove_rmap(page); force_flush = !__tlb_remove_page(tlb, page); - if (force_flush) + if (force_flush) { + spin_unlock(ptl); break; + } /* Bail out after unmapping reference page if supplied */ - if (ref_page) + if (ref_page) { + spin_unlock(ptl); break; + } +unlock: + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); /* * mmu_gather ran out of room to batch pages, we break out of * the PTE lock to avoid doing the potential expensive TLB invalidate @@ -2613,7 +2621,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, */ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte, - struct page *pagecache_page) + struct page *pagecache_page, spinlock_t *ptl) { struct hstate *h = hstate_vma(vma); struct page *old_page, *new_page; @@ -2647,8 +2655,8 @@ retry_avoidcopy: page_cache_get(old_page); - /* Drop page_table_lock as buddy allocator may be called */ - spin_unlock(&mm->page_table_lock); + /* Drop page table lock as buddy allocator may be called */ + spin_unlock(ptl); new_page = alloc_huge_page(vma, address, outside_reserve); if (IS_ERR(new_page)) { @@ -2666,13 +2674,13 @@ retry_avoidcopy: BUG_ON(huge_pte_none(pte)); if (unmap_ref_private(mm, vma, old_page, address)) { BUG_ON(huge_pte_none(pte)); - spin_lock(&mm->page_table_lock); + spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) goto retry_avoidcopy; /* - * race occurs while re-acquiring page_table_lock, and - * our job is done. + * race occurs while re-acquiring page table + * lock, and our job is done. */ return 0; } @@ -2680,7 +2688,7 @@ retry_avoidcopy: } /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (err == -ENOMEM) return VM_FAULT_OOM; else @@ -2695,7 +2703,7 @@ retry_avoidcopy: page_cache_release(new_page); page_cache_release(old_page); /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); return VM_FAULT_OOM; } @@ -2707,10 +2715,10 @@ retry_avoidcopy: mmun_end = mmun_start + huge_page_size(h); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* - * Retake the page_table_lock to check for racing updates + * Retake the page table lock to check for racing updates * before the page tables are altered */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); ptep = huge_pte_offset(mm, address & huge_page_mask(h)); if (likely(pte_same(huge_ptep_get(ptep), pte))) { ClearPagePrivate(new_page); @@ -2724,13 +2732,13 @@ retry_avoidcopy: /* Make the old page be freed below */ new_page = old_page; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); page_cache_release(new_page); page_cache_release(old_page); /* Caller expects lock to be held */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); return 0; } @@ -2778,6 +2786,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page; struct address_space *mapping; pte_t new_pte; + spinlock_t *ptl; /* * Currently, we are forced to kill the process in the event the @@ -2864,7 +2873,8 @@ retry: goto backout_unlocked; } - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(h, mm, ptep); + spin_lock(ptl); size = i_size_read(mapping->host) >> huge_page_shift(h); if (idx >= size) goto backout; @@ -2885,16 +2895,16 @@ retry: if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) { /* Optimization, do the COW without a second fault */ - ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page); + ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl); } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); unlock_page(page); out: return ret; backout: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); backout_unlocked: unlock_page(page); put_page(page); @@ -2906,6 +2916,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, { pte_t *ptep; pte_t entry; + spinlock_t *ptl; int ret; struct page *page = NULL; struct page *pagecache_page = NULL; @@ -2918,7 +2929,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (ptep) { entry = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait_huge(mm, ptep); + migration_entry_wait_huge(vma, mm, ptep); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | @@ -2974,17 +2985,18 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (page != pagecache_page) lock_page(page); - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(h, mm, ptep); + spin_lock(ptl); /* Check for a racing update before calling hugetlb_cow */ if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) - goto out_page_table_lock; + goto out_ptl; if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, - pagecache_page); - goto out_page_table_lock; + pagecache_page, ptl); + goto out_ptl; } entry = huge_pte_mkdirty(entry); } @@ -2993,8 +3005,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, flags & FAULT_FLAG_WRITE)) update_mmu_cache(vma, address, ptep); -out_page_table_lock: - spin_unlock(&mm->page_table_lock); +out_ptl: + spin_unlock(ptl); if (pagecache_page) { unlock_page(pagecache_page); @@ -3020,9 +3032,9 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long remainder = *nr_pages; struct hstate *h = hstate_vma(vma); - spin_lock(&mm->page_table_lock); while (vaddr < vma->vm_end && remainder) { pte_t *pte; + spinlock_t *ptl = NULL; int absent; struct page *page; @@ -3030,8 +3042,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, * Some archs (sparc64, sh*) have multiple pte_ts to * each hugepage. We have to make sure we get the * first, for the page indexing below to work. + * + * Note that page table lock is not held when pte is null. */ pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); + if (pte) + ptl = huge_pte_lock(h, mm, pte); absent = !pte || huge_pte_none(huge_ptep_get(pte)); /* @@ -3043,6 +3059,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, */ if (absent && (flags & FOLL_DUMP) && !hugetlbfs_pagecache_present(h, vma, vaddr)) { + if (pte) + spin_unlock(ptl); remainder = 0; break; } @@ -3062,10 +3080,10 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, !huge_pte_write(huge_ptep_get(pte)))) { int ret; - spin_unlock(&mm->page_table_lock); + if (pte) + spin_unlock(ptl); ret = hugetlb_fault(mm, vma, vaddr, (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0); - spin_lock(&mm->page_table_lock); if (!(ret & VM_FAULT_ERROR)) continue; @@ -3096,8 +3114,8 @@ same_page: */ goto same_page; } + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); *nr_pages = remainder; *position = vaddr; @@ -3118,13 +3136,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, flush_cache_range(vma, address, end); mutex_lock(&vma->vm_file->f_mapping->i_mmap_mutex); - spin_lock(&mm->page_table_lock); for (; address < end; address += huge_page_size(h)) { + spinlock_t *ptl; ptep = huge_pte_offset(mm, address); if (!ptep) continue; + ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { pages++; + spin_unlock(ptl); continue; } if (!huge_pte_none(huge_ptep_get(ptep))) { @@ -3134,8 +3154,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, set_huge_pte_at(mm, address, ptep, pte); pages++; } + spin_unlock(ptl); } - spin_unlock(&mm->page_table_lock); /* * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare * may have cleared our pud entry and done put_page on the page table: @@ -3298,6 +3318,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) unsigned long saddr; pte_t *spte = NULL; pte_t *pte; + spinlock_t *ptl; if (!vma_shareable(vma, addr)) return (pte_t *)pmd_alloc(mm, pud, addr); @@ -3320,13 +3341,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (!spte) goto out; - spin_lock(&mm->page_table_lock); + ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); + spin_lock(ptl); if (pud_none(*pud)) pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); else put_page(virt_to_page(spte)); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out: pte = (pte_t *)pmd_alloc(mm, pud, addr); mutex_unlock(&mapping->i_mmap_mutex); @@ -3340,7 +3362,7 @@ out: * indicated by page_count > 1, unmap is achieved by clearing pud and * decrementing the ref count. If count == 1, the pte page is not shared. * - * called with vma->vm_mm->page_table_lock held. + * called with page table lock held. * * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4cc19f6ab6c6..c4403cdf3433 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -525,8 +525,9 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, #ifdef CONFIG_HUGETLB_PAGE int nid; struct page *page; + spinlock_t *ptl; - spin_lock(&vma->vm_mm->page_table_lock); + ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); page = pte_page(huge_ptep_get((pte_t *)pmd)); nid = page_to_nid(page); if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) @@ -536,7 +537,7 @@ static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) isolate_huge_page(page, private); unlock: - spin_unlock(&vma->vm_mm->page_table_lock); + spin_unlock(ptl); #else BUG(); #endif diff --git a/mm/migrate.c b/mm/migrate.c index dfc8300ecbb2..01f45cefa4cd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -130,7 +130,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, ptep = huge_pte_offset(mm, addr); if (!ptep) goto out; - ptl = &mm->page_table_lock; + ptl = huge_pte_lockptr(hstate_vma(vma), mm, ptep); } else { pmd = mm_find_pmd(mm, addr); if (!pmd) @@ -249,9 +249,10 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, __migration_entry_wait(mm, ptep, ptl); } -void migration_entry_wait_huge(struct mm_struct *mm, pte_t *pte) +void migration_entry_wait_huge(struct vm_area_struct *vma, + struct mm_struct *mm, pte_t *pte) { - spinlock_t *ptl = &(mm)->page_table_lock; + spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte); __migration_entry_wait(mm, pte, ptl); } diff --git a/mm/rmap.c b/mm/rmap.c index b59d741dcf65..55c8b8dc9ffb 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -601,7 +601,7 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, if (unlikely(PageHuge(page))) { pte = huge_pte_offset(mm, address); - ptl = &mm->page_table_lock; + ptl = huge_pte_lockptr(page_hstate(page), mm, pte); goto check; } From c4088ebdca64c9a2e34a38177d2249805ede1f4b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:04 -0800 Subject: [PATCH 11/72] mm: convert the rest to new page table lock api Only trivial cases left. Let's convert them altogether. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 108 ++++++++++++++++++++++++------------------- mm/memory.c | 13 +++--- mm/migrate.c | 7 +-- mm/pgtable-generic.c | 4 +- 4 files changed, 73 insertions(+), 59 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c2082ab4fc93..bccd5a628ea6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, struct page *page) { pgtable_t pgtable; + spinlock_t *ptl; VM_BUG_ON(!PageCompound(page)); pgtable = pte_alloc_one(mm, haddr); @@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, */ __SetPageUptodate(page); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_none(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mem_cgroup_uncharge_page(page); put_page(page); pte_free(mm, pgtable); @@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, set_pmd_at(mm, haddr, pmd, entry); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); atomic_long_inc(&mm->nr_ptes); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } return 0; @@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag, HPAGE_PMD_ORDER, vma, haddr, nd); } +/* Caller must hold page table lock. */ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) @@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, return VM_FAULT_OOM; if (!(flags & FAULT_FLAG_WRITE) && transparent_hugepage_use_zero_page()) { + spinlock_t *ptl; pgtable_t pgtable; struct page *zero_page; bool set; @@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, zero_page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (!set) { pte_free(mm, pgtable); put_huge_zero_page(); @@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma) { + spinlock_t *dst_ptl, *src_ptl; struct page *src_page; pmd_t pmd; pgtable_t pgtable; @@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (unlikely(!pgtable)) goto out; - spin_lock(&dst_mm->page_table_lock); - spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING); + dst_ptl = pmd_lock(dst_mm, dst_pmd); + src_ptl = pmd_lockptr(src_mm, src_pmd); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ret = -EAGAIN; pmd = *src_pmd; @@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out_unlock; } /* - * mm->page_table_lock is enough to be sure that huge zero pmd is not + * When page table lock is held, the huge zero pmd should not be * under splitting since we don't split the page itself, only pmd to * a page table. */ @@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, } if (unlikely(pmd_trans_splitting(pmd))) { /* split huge page running from under us */ - spin_unlock(&src_mm->page_table_lock); - spin_unlock(&dst_mm->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); pte_free(dst_mm, pgtable); wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ @@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = 0; out_unlock: - spin_unlock(&src_mm->page_table_lock); - spin_unlock(&dst_mm->page_table_lock); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); out: return ret; } @@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm, pmd_t *pmd, pmd_t orig_pmd, int dirty) { + spinlock_t *ptl; pmd_t entry; unsigned long haddr; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto unlock; @@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm, update_mmu_cache_pmd(vma, address, pmd); unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); } static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) { + spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; struct page *page; @@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_page; @@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, } smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); put_huge_zero_page(); inc_mm_counter(mm, MM_ANONPAGES); @@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, out: return ret; out_free_page: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_page(page); put_page(page); @@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, struct page *page, unsigned long haddr) { + spinlock_t *ptl; pgtable_t pgtable; pmd_t _pmd; int ret = 0, i; @@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; VM_BUG_ON(!PageHead(page)); @@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, smp_wmb(); /* make pte visible before pmd */ pmd_populate(mm, pmd, pgtable); page_remove_rmap(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); @@ -1092,7 +1100,7 @@ out: return ret; out_free_pages: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mem_cgroup_uncharge_start(); for (i = 0; i < HPAGE_PMD_NR; i++) { @@ -1107,17 +1115,19 @@ out_free_pages: int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, pmd_t orig_pmd) { + spinlock_t *ptl; int ret = 0; struct page *page = NULL, *new_page; unsigned long haddr; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ + ptl = pmd_lockptr(mm, pmd); VM_BUG_ON(!vma->anon_vma); haddr = address & HPAGE_PMD_MASK; if (is_huge_zero_pmd(orig_pmd)) goto alloc; - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_unlock; @@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; } get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); alloc: if (transparent_hugepage_enabled(vma) && !transparent_hugepage_debug_cow()) @@ -1180,11 +1190,11 @@ alloc: mmun_end = haddr + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (page) put_page(page); if (unlikely(!pmd_same(*pmd, orig_pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mem_cgroup_uncharge_page(new_page); put_page(new_page); goto out_mn; @@ -1206,13 +1216,13 @@ alloc: } ret |= VM_FAULT_WRITE; } - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out_mn: mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out: return ret; out_unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); return ret; } @@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; struct page *page = NULL; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmd)); if (flags & FOLL_WRITE && !pmd_write(*pmd)) goto out; @@ -1271,6 +1281,7 @@ out: int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp) { + spinlock_t *ptl; struct anon_vma *anon_vma = NULL; struct page *page; unsigned long haddr = addr & HPAGE_PMD_MASK; @@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, bool migrated = false; int flags = 0; - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmdp); if (unlikely(!pmd_same(pmd, *pmdp))) goto out_unlock; @@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * relock and check_same as the page may no longer be mapped. * As the fault is being retried, do not account for it. */ - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); wait_on_page_locked(page); page_nid = -1; goto out; @@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, /* Page is misplaced, serialise migrations and parallel THP splits */ get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (!page_locked) lock_page(page); anon_vma = page_lock_anon_vma_read(page); /* Confirm the PMD did not change while page_table_lock was released */ - spin_lock(&mm->page_table_lock); + spin_lock(ptl); if (unlikely(!pmd_same(pmd, *pmdp))) { unlock_page(page); put_page(page); @@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, * Migrate the THP to the requested node, returns with page unlocked * and pmd_numa cleared. */ - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); migrated = migrate_misplaced_transhuge_page(mm, vma, pmdp, pmd, addr, page, target_nid); if (migrated) { @@ -1361,7 +1372,7 @@ clear_pmdnuma: update_mmu_cache_pmd(vma, addr, pmdp); unlock_page(page); out_unlock: - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); out: if (anon_vma) @@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm, pte_t *pte; pgtable_t pgtable; struct page *new_page; - spinlock_t *ptl; + spinlock_t *pmd_ptl, *pte_ptl; int isolated; unsigned long hstart, hend; unsigned long mmun_start; /* For mmu_notifiers */ @@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm, anon_vma_lock_write(vma->anon_vma); pte = pte_offset_map(pmd, address); - ptl = pte_lockptr(mm, pmd); + pte_ptl = pte_lockptr(mm, pmd); mmun_start = address; mmun_end = address + HPAGE_PMD_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); /* probably unnecessary */ + pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */ /* * After this gup_fast can't run anymore. This also removes * any huge TLB entry from the CPU so we won't allow @@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm, * to avoid the risk of CPU bugs in that area. */ _pmd = pmdp_clear_flush(vma, address, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); - spin_lock(ptl); + spin_lock(pte_ptl); isolated = __collapse_huge_page_isolate(vma, address, pte); - spin_unlock(ptl); + spin_unlock(pte_ptl); if (unlikely(!isolated)) { pte_unmap(pte); - spin_lock(&mm->page_table_lock); + spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); /* * We can only use set_pmd_at when establishing @@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm, * points to regular pagetables. Use pmd_populate for that */ pmd_populate(mm, pmd, pmd_pgtable(_pmd)); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); anon_vma_unlock_write(vma->anon_vma); goto out; } @@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm, */ anon_vma_unlock_write(vma->anon_vma); - __collapse_huge_page_copy(pte, new_page, vma, address, ptl); + __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl); pte_unmap(pte); __SetPageUptodate(new_page); pgtable = pmd_pgtable(_pmd); @@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm, */ smp_wmb(); - spin_lock(&mm->page_table_lock); + spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); page_add_new_anon_rmap(new_page, vma, address); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, address, pmd, _pmd); update_mmu_cache_pmd(vma, address, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(pmd_ptl); *hpage = NULL; @@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd) { + spinlock_t *ptl; struct page *page; struct mm_struct *mm = vma->vm_mm; unsigned long haddr = address & HPAGE_PMD_MASK; @@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, mmun_end = haddr + HPAGE_PMD_SIZE; again: mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_trans_huge(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return; } if (is_huge_zero_pmd(*pmd)) { __split_huge_zero_page_pmd(vma, haddr, pmd); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); return; } page = pmd_page(*pmd); VM_BUG_ON(!page_count(page)); get_page(page); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); split_huge_page(page); diff --git a/mm/memory.c b/mm/memory.c index 0b5a93a49f27..d05c6b10e926 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -550,6 +550,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd, unsigned long address) { + spinlock_t *ptl; pgtable_t new = pte_alloc_one(mm, address); int wait_split_huge_page; if (!new) @@ -570,7 +571,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); wait_split_huge_page = 0; if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ atomic_long_inc(&mm->nr_ptes); @@ -578,7 +579,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, new = NULL; } else if (unlikely(pmd_trans_splitting(*pmd))) wait_split_huge_page = 1; - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); if (new) pte_free(mm, new); if (wait_split_huge_page) @@ -1516,20 +1517,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma, split_huge_page_pmd(vma, address, pmd); goto split_fallthrough; } - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (likely(pmd_trans_huge(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); wait_split_huge_page(vma->anon_vma, pmd); } else { page = follow_trans_huge_pmd(vma, address, pmd, flags); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); *page_mask = HPAGE_PMD_NR - 1; goto out; } } else - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); /* fall through */ } split_fallthrough: diff --git a/mm/migrate.c b/mm/migrate.c index 01f45cefa4cd..316e720a2023 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1667,6 +1667,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, unsigned long address, struct page *page, int node) { + spinlock_t *ptl; unsigned long haddr = address & HPAGE_PMD_MASK; pg_data_t *pgdat = NODE_DATA(node); int isolated = 0; @@ -1706,9 +1707,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, WARN_ON(PageLRU(new_page)); /* Recheck the target PMD */ - spin_lock(&mm->page_table_lock); + ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, entry))) { - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); /* Reverse changes made by migrate_page_copy() */ if (TestClearPageActive(new_page)) @@ -1753,7 +1754,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, * before it's fully transferred to the new page. */ mem_cgroup_end_migration(memcg, page, new_page, true); - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); unlock_page(new_page); unlock_page(page); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 41fee3e5d570..cbb38545d9d6 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -151,7 +151,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ if (!pmd_huge_pte(mm, pmdp)) @@ -170,7 +170,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) { pgtable_t pgtable; - assert_spin_locked(&mm->page_table_lock); + assert_spin_locked(pmd_lockptr(mm, pmdp)); /* FIFO */ pgtable = pmd_huge_pte(mm, pmdp); From e009bb30c8df8a52a9622b616b67436b6a03a0cd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:07 -0800 Subject: [PATCH 12/72] mm: implement split page table lock for PMD level The basic idea is the same as with PTE level: the lock is embedded into struct page of table's page. We can't use mm->pmd_huge_pte to store pgtables for THP, since we don't take mm->page_table_lock anymore. Let's reuse page->lru of table's page for that. pgtable_pmd_page_ctor() returns true, if initialization is successful and false otherwise. Current implementation never fails, but assumption that constructor can fail will help to port it to -rt where spinlock_t is rather huge and cannot be embedded into struct page -- dynamic allocation is required. Signed-off-by: Naoya Horiguchi Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Reviewed-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 7 ++++++- kernel/fork.c | 4 ++-- mm/Kconfig | 3 +++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 861cad53b744..255750d9b1be 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1378,13 +1378,45 @@ static inline void pgtable_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ NULL: pte_offset_kernel(pmd, address)) +#if USE_SPLIT_PMD_PTLOCKS + +static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &virt_to_page(pmd)->ptl; +} + +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ + spin_lock_init(&page->ptl); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + page->pmd_huge_pte = NULL; +#endif + return true; +} + +static inline void pgtable_pmd_page_dtor(struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + VM_BUG_ON(page->pmd_huge_pte); +#endif +} + +#define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) + +#else + static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } +static inline void pgtable_pmd_page_dtor(struct page *page) {} + #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) +#endif + static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl = pmd_lockptr(mm, pmd); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 566df579c51f..934261099975 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -24,6 +24,8 @@ struct address_space; #define USE_SPLIT_PTE_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS) +#define USE_SPLIT_PMD_PTLOCKS (USE_SPLIT_PTE_PTLOCKS && \ + IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) /* * Each physical page in the system has a struct page associated with @@ -63,6 +65,9 @@ struct page { * this page is only used to * free other pages. */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS + pgtable_t pmd_huge_pte; /* protected by page->ptl */ +#endif }; union { @@ -406,7 +411,7 @@ struct mm_struct { #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_mm *mmu_notifier_mm; #endif -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/kernel/fork.c b/kernel/fork.c index e2520756e005..728d5be9548c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -560,7 +560,7 @@ static void check_mm(struct mm_struct *mm) "mm:%p idx:%d val:%ld\n", mm, i, x); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON(mm->pmd_huge_pte); #endif } @@ -814,7 +814,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); mm_init_cpumask(mm); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif if (!mm_init(mm, tsk)) diff --git a/mm/Kconfig b/mm/Kconfig index c28d247e524a..7aa02def4666 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -222,6 +222,9 @@ config SPLIT_PTLOCK_CPUS default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + boolean + # # support for memory balloon compaction config BALLOON_COMPACTION From 9491846fca57e9326b6673716c386b76fc13ebca Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:10 -0800 Subject: [PATCH 13/72] x86, mm: enable split page table lock for PMD level Enable PMD split page table lock for X86_64 and PAE. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Kirill A. Shutemov Tested-by: Alex Thorlton Cc: Ingo Molnar Cc: Naoya Horiguchi Cc: "Eric W . Biederman" Cc: "Paul E . McKenney" Cc: Al Viro Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Dave Hansen Cc: Dave Jones Cc: David Howells Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Kees Cook Cc: Mel Gorman Cc: Michael Kerrisk Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Robin Holt Cc: Sedat Dilek Cc: Srikar Dronamraju Cc: Thomas Gleixner Cc: Hugh Dickins Reviewed-by: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 4 ++++ arch/x86/include/asm/pgalloc.h | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6e3e1cb3f6a0..af5513e5798a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1885,6 +1885,10 @@ config USE_PERCPU_NUMA_NODE_ID def_bool y depends on NUMA +config ARCH_ENABLE_SPLIT_PMD_PTLOCK + def_bool y + depends on X86_64 || X86_PAE + menu "Power management and ACPI options" config ARCH_HIBERNATION_HEADER diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index b4389a468fb6..c4412e972bbd 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -80,12 +80,21 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, #if PAGETABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); + struct page *page; + page = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); + if (!page) + return NULL; + if (!pgtable_pmd_page_ctor(page)) { + __free_pages(page, 0); + return NULL; + } + return (pmd_t *)page_address(page); } static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); + pgtable_pmd_page_dtor(virt_to_page(pmd)); free_page((unsigned long)pmd); } From 09ef4939850aa81e3822b5dfb9ba2ada5e565816 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:13 -0800 Subject: [PATCH 14/72] x86: add missed pgtable_pmd_page_ctor/dtor calls for preallocated pmds In split page table lock case, we embed spinlock_t into struct page. For obvious reason, we don't want to increase size of struct page if spinlock_t is too big, like with DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC or on -rt kernel. So we disable split page table lock, if spinlock_t is too big. This patchset allows to allocate the lock dynamically if spinlock_t is big. In this page->ptl is used to store pointer to spinlock instead of spinlock itself. It costs additional cache line for indirect access, but fix page fault scalability for multi-threaded applications. LOCK_STAT depends on DEBUG_SPINLOCK, so on current kernel enabling LOCK_STAT to analyse scalability issues breaks scalability. ;) The patchset mostly fixes this. Results for ./thp_memscale -c 80 -b 512M on 4-socket machine: baseline, no CONFIG_LOCK_STAT: 9.115460703 seconds time elapsed baseline, CONFIG_LOCK_STAT=y: 53.890567123 seconds time elapsed patched, no CONFIG_LOCK_STAT: 8.852250368 seconds time elapsed patched, CONFIG_LOCK_STAT=y: 11.069770759 seconds time elapsed Patch count is scary, but most of them trivial. Overview: Patches 1-4 Few bug fixes. No dependencies to other patches. Probably should applied as soon as possible. Patch 5 Changes signature of pgtable_page_ctor(). We will use it for dynamic lock allocation, so it can fail. Patches 6-8 Add missing constructor/destructor calls on few archs. It's fixes NR_PAGETABLE accounting and prepare to use split ptl. Patches 9-33 Add pgtable_page_ctor() fail handling to all archs. Patches 34 Finally adds support of dynamically-allocated page->pte. Also contains documentation for split page table lock. This patch (of 34): I've missed that we preallocate few pmds on pgd_alloc() if X86_PAE enabled. Let's add missed constructor/destructor calls. I haven't noticed it during testing since prep_new_page() clears page->mapping and therefore page->ptl. It's effectively equal to spin_lock_init(&page->ptl). Signed-off-by: Kirill A. Shutemov Acked-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: "James E.J. Bottomley" Cc: "Kirill A. Shutemov" Cc: Benjamin Herrenschmidt Cc: Catalin Marinas Cc: Chen Liqin Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: David Howells Cc: David S. Miller Cc: Fenghua Yu Cc: Geert Uytterhoeven Cc: Grant Likely Cc: Guan Xuetao Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Heiko Carstens Cc: Helge Deller Cc: Hirokazu Takata Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jeff Dike Cc: Jesper Nilsson Cc: Jonas Bonn Cc: Koichi Yasutake Cc: Lennox Wu Cc: Martin Schwidefsky Cc: Matt Turner Cc: Max Filippov Cc: Michal Simek Cc: Mikael Starvik Cc: Paul Mackerras Cc: Paul Mundt Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Richard Henderson Cc: Richard Kuo Cc: Richard Weinberger Cc: Rob Herring Cc: Russell King Cc: Thomas Gleixner Cc: Tony Luck Cc: Vineet Gupta Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index dfa537a03be1..1a7d21342e02 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -189,8 +189,10 @@ static void free_pmds(pmd_t *pmds[]) int i; for(i = 0; i < PREALLOCATED_PMDS; i++) - if (pmds[i]) + if (pmds[i]) { + pgtable_pmd_page_dtor(virt_to_page(pmds[i])); free_page((unsigned long)pmds[i]); + } } static int preallocate_pmds(pmd_t *pmds[]) @@ -200,8 +202,13 @@ static int preallocate_pmds(pmd_t *pmds[]) for(i = 0; i < PREALLOCATED_PMDS; i++) { pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); - if (pmd == NULL) + if (!pmd) failed = true; + if (pmd && !pgtable_pmd_page_ctor(virt_to_page(pmd))) { + free_page((unsigned long)pmds[i]); + pmd = NULL; + failed = true; + } pmds[i] = pmd; } From 26db39027c00744107acf184527010a1e05907e7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:17 -0800 Subject: [PATCH 15/72] cris: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Mikael Starvik Acked-by: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/include/asm/pgalloc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index 6da975db112f..d9504d38c276 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -32,6 +32,8 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres { struct page *pte; pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + if (!pte) + return NULL; pgtable_page_ctor(pte); return pte; } From fecf3743b824ce4eb275ed4a1d6aee9494f6a966 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:18 -0800 Subject: [PATCH 16/72] m32r: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/include/asm/pgalloc.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/m32r/include/asm/pgalloc.h b/arch/m32r/include/asm/pgalloc.h index 0fc736198979..ac4208bcc5ad 100644 --- a/arch/m32r/include/asm/pgalloc.h +++ b/arch/m32r/include/asm/pgalloc.h @@ -43,6 +43,8 @@ static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pte) + return NULL; pgtable_page_ctor(pte); return pte; } From f8c6d30b766fc8eb83f5b7983ff8a5a9b3189365 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:19 -0800 Subject: [PATCH 17/72] xtensa: fix potential NULL-pointer dereference Add missing check for memory allocation fail. Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/include/asm/pgalloc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index cf914c8c249a..037671a655dc 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -51,9 +51,13 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) { + pte_t *pte; struct page *page; - page = virt_to_page(pte_alloc_one_kernel(mm, addr)); + pte = pte_alloc_one_kernel(mm, addr); + if (!pte) + return NULL; + page = virt_to_page(pte); pgtable_page_ctor(page); return page; } From 390f44e2aa2ab83f08231d7d05f066dc3494490e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:20 -0800 Subject: [PATCH 18/72] mm: allow pgtable_page_ctor() to fail Change pgtable_page_ctor() return type from void to bool. Returns true, if initialization is successful and false otherwise. Current implementation never fails, but it will change later. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 255750d9b1be..e855351c3361 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1338,10 +1338,11 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #endif /* USE_SPLIT_PTE_PTLOCKS */ -static inline void pgtable_page_ctor(struct page *page) +static inline bool pgtable_page_ctor(struct page *page) { pte_lock_init(page); inc_zone_page_state(page, NR_PAGETABLE); + return true; } static inline void pgtable_page_dtor(struct page *page) From 8abe73465660f12dee03871f681175f4dae62e7f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:21 -0800 Subject: [PATCH 19/72] microblaze: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Cc: Michal Simek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/pgalloc.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h index ebd35792482c..7fdf7fabc7d7 100644 --- a/arch/microblaze/include/asm/pgalloc.h +++ b/arch/microblaze/include/asm/pgalloc.h @@ -122,8 +122,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, #endif ptepage = alloc_pages(flags, 0); - if (ptepage) - clear_highpage(ptepage); + if (!ptepage) + return NULL; + clear_highpage(ptepage); + if (!pgtable_page_ctor(ptepage)) { + __free_page(ptepage); + return NULL; + } return ptepage; } @@ -158,8 +163,9 @@ extern inline void pte_free_slow(struct page *ptepage) __free_page(ptepage); } -extern inline void pte_free(struct mm_struct *mm, struct page *ptepage) +static inline void pte_free(struct mm_struct *mm, struct page *ptepage) { + pgtable_page_dtor(ptepage); __free_page(ptepage); } From 0470d4aa29eb0a49554fe71e986e6d6c38dd85cb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:22 -0800 Subject: [PATCH 20/72] mn10300: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Acked-by: David Howells Cc: Koichi Yasutake Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mn10300/include/asm/pgalloc.h | 1 + arch/mn10300/mm/pgtable.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/mn10300/include/asm/pgalloc.h b/arch/mn10300/include/asm/pgalloc.h index 146bacf193ea..0f25d5fa86f3 100644 --- a/arch/mn10300/include/asm/pgalloc.h +++ b/arch/mn10300/include/asm/pgalloc.h @@ -46,6 +46,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } diff --git a/arch/mn10300/mm/pgtable.c b/arch/mn10300/mm/pgtable.c index bd9ada693f95..e77a7c728081 100644 --- a/arch/mn10300/mm/pgtable.c +++ b/arch/mn10300/mm/pgtable.c @@ -78,8 +78,13 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #else pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (pte) - clear_highpage(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From b3d59c6eb49476110b4a76bfbae63daae1cda268 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:23 -0800 Subject: [PATCH 21/72] openrisc: add missing pgtable_page_ctor/dtor calls It will fix NR_PAGETABLE accounting. It's also required if the arch is going ever support split ptl. Signed-off-by: Kirill A. Shutemov Cc: Jonas Bonn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/openrisc/include/asm/pgalloc.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index 05c39ecd2efd..21484e5b9e9a 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -78,8 +78,13 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, { struct page *pte; pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); - if (pte) - clear_page(page_address(pte)); + if (!pte) + return NULL; + clear_page(page_address(pte)); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } @@ -90,6 +95,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline void pte_free(struct mm_struct *mm, struct page *pte) { + pgtable_page_dtor(pte); __free_page(pte); } From 3fd681b68cd34eacb106b25fcb10bb202a3232c5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:24 -0800 Subject: [PATCH 22/72] alpha: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index bc2a0daf2d92..aab14a019c20 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -72,7 +72,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } From ca6ec3bbaaaf5941e970314a2eb3680b9e7e698a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:25 -0800 Subject: [PATCH 23/72] arc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Vineet Gupta [for arch/arc bits] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arc/include/asm/pgalloc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h index 36a9f20c21a3..81208bfd9dcb 100644 --- a/arch/arc/include/asm/pgalloc.h +++ b/arch/arc/include/asm/pgalloc.h @@ -105,11 +105,16 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { pgtable_t pte_pg; + struct page *page; pte_pg = __get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); - if (pte_pg) { - memzero((void *)pte_pg, PTRS_PER_PTE * 4); - pgtable_page_ctor(virt_to_page(pte_pg)); + if (!pte_pg) + return 0; + memzero((void *)pte_pg, PTRS_PER_PTE * 4); + page = virt_to_page(pte_pg); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return 0; } return pte_pg; From affce5089a82a045274def17b44b13d10ab7c1d4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:26 -0800 Subject: [PATCH 24/72] arm: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgalloc.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 943504f53f57..78a779361682 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -102,12 +102,14 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) #else pte = alloc_pages(PGALLOC_GFP, 0); #endif - if (pte) { - if (!PageHighMem(pte)) - clean_pte_table(page_address(pte)); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!PageHighMem(pte)) + clean_pte_table(page_address(pte)); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } - return pte; } From d97a22913808b191c95fbfc51e6405c4504979e6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:27 -0800 Subject: [PATCH 25/72] arm64: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/pgalloc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index f214069ec5d5..9bea6e74a001 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -63,9 +63,12 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) struct page *pte; pte = alloc_pages(PGALLOC_GFP, 0); - if (pte) - pgtable_page_ctor(pte); - + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From 2cb6182bb6153f685c37773687e63094317b1eeb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:28 -0800 Subject: [PATCH 26/72] avr32: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Haavard Skinnemoen Acked-by: Hans-Christian Egtvedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/avr32/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h index bc7e8ae479ee..1aba19d68c5e 100644 --- a/arch/avr32/include/asm/pgalloc.h +++ b/arch/avr32/include/asm/pgalloc.h @@ -68,7 +68,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(QUICK_PT, NULL, pg); + return NULL; + } return page; } From 0da5303bdb55e67963e8b53ebf3a2aa75051cab4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:29 -0800 Subject: [PATCH 27/72] cris: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Mikael Starvik Acked-by: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h index d9504d38c276..235ece437ddd 100644 --- a/arch/cris/include/asm/pgalloc.h +++ b/arch/cris/include/asm/pgalloc.h @@ -34,7 +34,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addres pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); if (!pte) return NULL; - pgtable_page_ctor(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From 3b9cf77d1aad657dd44ab0f0368978e87b64ad23 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:30 -0800 Subject: [PATCH 28/72] frv: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/frv/mm/pgalloc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c index f6084bc524e8..41907d25ed38 100644 --- a/arch/frv/mm/pgalloc.c +++ b/arch/frv/mm/pgalloc.c @@ -37,11 +37,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) #else page = alloc_pages(GFP_KERNEL|__GFP_REPEAT, 0); #endif - if (page) { - clear_highpage(page); - pgtable_page_ctor(page); - flush_dcache_page(page); + if (!page) + return NULL; + + clear_highpage(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; } + flush_dcache_page(page); return page; } From 5de1423d8901bed12f1b96acef27ac133592eaeb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:31 -0800 Subject: [PATCH 29/72] hexagon: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Richard Kuo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/hexagon/include/asm/pgalloc.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index 679bf6d66487..4c9d382d7798 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -65,10 +65,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO); - - if (pte) - pgtable_page_ctor(pte); - + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From ca973d86d4c5b70c32e7b91ce08f3e8e061e2535 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:32 -0800 Subject: [PATCH 30/72] ia64: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 96a8d927db28..5767cdfc08db 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -91,7 +91,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr) if (!pg) return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(0, NULL, pg); + return NULL; + } return page; } From 7251ab6b86179f195b3f4b56d57ce9dc7a725409 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:33 -0800 Subject: [PATCH 31/72] m32r: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/m32r/include/asm/pgalloc.h b/arch/m32r/include/asm/pgalloc.h index ac4208bcc5ad..2d55a064ccac 100644 --- a/arch/m32r/include/asm/pgalloc.h +++ b/arch/m32r/include/asm/pgalloc.h @@ -45,7 +45,10 @@ static __inline__ pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; - pgtable_page_ctor(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From f84c914b986ed2ec4ffaa5672b423b1f6b65519d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:34 -0800 Subject: [PATCH 32/72] m68k: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/mcf_pgalloc.h | 4 ++++ arch/m68k/include/asm/motorola_pgalloc.h | 8 ++++++-- arch/m68k/include/asm/sun3_pgalloc.h | 5 ++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h index 313f3dd23cdc..f9924fbcfe42 100644 --- a/arch/m68k/include/asm/mcf_pgalloc.h +++ b/arch/m68k/include/asm/mcf_pgalloc.h @@ -56,6 +56,10 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, if (!page) return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } pte = kmap(page); if (pte) { diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index 2f02f264e694..24bcba496c75 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -29,18 +29,22 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { - struct page *page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); + struct page *page; pte_t *pte; + page = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); if(!page) return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } pte = kmap(page); __flush_page_to_ram(pte); flush_tlb_kernel_page(pte); nocache_page(pte); kunmap(page); - pgtable_page_ctor(page); return page; } diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 48d80d5a666f..f868506e3350 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -59,7 +59,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; clear_highpage(page); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } From 855a30531806388cd63940cb03a18d6546404805 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:35 -0800 Subject: [PATCH 33/72] metag: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: James Hogan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/metag/include/asm/pgalloc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h index 275d9285141c..3104df0a4822 100644 --- a/arch/metag/include/asm/pgalloc.h +++ b/arch/metag/include/asm/pgalloc.h @@ -52,8 +52,12 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, { struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From 3b5b51c1a7bd148e1d7721874849435d76728375 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:36 -0800 Subject: [PATCH 34/72] mips: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/include/asm/pgalloc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 881d18b4e298..b336037e8768 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -80,9 +80,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); - if (pte) { - clear_highpage(pte); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } return pte; } From bc16640dce9035177c99d8fb11d3b94abe9f36c8 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:37 -0800 Subject: [PATCH 35/72] parisc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: "James E.J. Bottomley" Cc: Helge Deller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/parisc/include/asm/pgalloc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h index fc987a1c12a8..f213f5b4c423 100644 --- a/arch/parisc/include/asm/pgalloc.h +++ b/arch/parisc/include/asm/pgalloc.h @@ -121,8 +121,12 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - if (page) - pgtable_page_ctor(page); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } From 4f804943f99454ac79e0f448428447f1a72d09fc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:38 -0800 Subject: [PATCH 36/72] powerpc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/include/asm/pgalloc-64.h | 5 ++++- arch/powerpc/mm/pgtable_32.c | 5 ++++- arch/powerpc/mm/pgtable_64.c | 7 ++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h index f65e27b09bd3..16cb92d215d2 100644 --- a/arch/powerpc/include/asm/pgalloc-64.h +++ b/arch/powerpc/include/asm/pgalloc-64.h @@ -91,7 +91,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6c856fb8c15b..5b9601715289 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -121,7 +121,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) ptepage = alloc_pages(flags, 0); if (!ptepage) return NULL; - pgtable_page_ctor(ptepage); + if (!pgtable_page_ctor(ptepage)) { + __free_page(ptepage); + return NULL; + } return ptepage; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 536eec72c0f7..9d95786aa80f 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -378,6 +378,10 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) __GFP_REPEAT | __GFP_ZERO); if (!page) return NULL; + if (!kernel && !pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } ret = page_address(page); spin_lock(&mm->page_table_lock); @@ -392,9 +396,6 @@ static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) } spin_unlock(&mm->page_table_lock); - if (!kernel) - pgtable_page_ctor(page); - return (pte_t *)ret; } From e89cfa58a8358fdb4d4e79936c25222416ad415e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:39 -0800 Subject: [PATCH 37/72] s390: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/pgtable.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 1ea18fcfa211..e794c88f699a 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -772,7 +772,11 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, __free_page(page); return NULL; } - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + kfree(mp); + __free_page(page); + return NULL; + } mp->vmaddr = vmaddr & PMD_MASK; INIT_LIST_HEAD(&mp->mapper); page->index = (unsigned long) mp; @@ -902,7 +906,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE); From 96da3a62ea1fef2d9dfa8eff97706603918d5f4d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:40 -0800 Subject: [PATCH 38/72] score: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Chen Liqin Acked-by: Lennox Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/score/include/asm/pgalloc.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/score/include/asm/pgalloc.h b/arch/score/include/asm/pgalloc.h index 716b3fd1d863..2e067657db98 100644 --- a/arch/score/include/asm/pgalloc.h +++ b/arch/score/include/asm/pgalloc.h @@ -54,9 +54,12 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm, struct page *pte; pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER); - if (pte) { - clear_highpage(pte); - pgtable_page_ctor(pte); + if (!pte) + return NULL; + clear_highpage(pte); + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; } return pte; } From 478cf8ca013f9e33be978dee4d0a15906e5a030a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:41 -0800 Subject: [PATCH 39/72] sh: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sh/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h index 8c00785c60d5..a33673b3687d 100644 --- a/arch/sh/include/asm/pgalloc.h +++ b/arch/sh/include/asm/pgalloc.h @@ -47,7 +47,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pg) return NULL; page = virt_to_page(pg); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + quicklist_free(QUICK_PT, NULL, pg); + return NULL; + } return page; } From 1ae9ae5f7df726c67736c643c1f03f7bdfe748eb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:42 -0800 Subject: [PATCH 40/72] sparc: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/mm/init_64.c | 11 ++++++----- arch/sparc/mm/srmmu.c | 5 ++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index ed82edad1a39..d6de9353ee11 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2519,12 +2519,13 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, return pte; page = __alloc_for_cache(mm); - if (page) { - pgtable_page_ctor(page); - pte = (pte_t *) page_address(page); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + free_hot_cold_page(page, 0); + return NULL; } - - return pte; + return (pte_t *) page_address(page); } void pte_free_kernel(struct mm_struct *mm, pte_t *pte) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 5d721df48a72..869023abe5a4 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -345,7 +345,10 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0) return NULL; page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } return page; } From 76b3aec332e74596c517e1198764a5aeb8ea8398 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:43 -0800 Subject: [PATCH 41/72] tile: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/mm/pgtable.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 4fd9ec0b58ed..5e86eac4bfae 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -241,6 +241,11 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, if (p == NULL) return NULL; + if (!pgtable_page_ctor(p)) { + __free_pages(p, L2_USER_PGTABLE_ORDER); + return NULL; + } + /* * Make every page have a page_count() of one, not just the first. * We don't use __GFP_COMP since it doesn't look like it works @@ -251,7 +256,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, inc_zone_page_state(p+i, NR_PAGETABLE); } - pgtable_page_ctor(p); return p; } From 647f884e67dd920e01046a1ae87aa2c2d315479a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:44 -0800 Subject: [PATCH 42/72] um: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Jeff Dike Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/mem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c index 7ddb64baf327..8636e905426f 100644 --- a/arch/um/kernel/mem.c +++ b/arch/um/kernel/mem.c @@ -279,8 +279,12 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; pte = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From e90bedbb8a847a0e6debe0e00b6f69b788fca92e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:46 -0800 Subject: [PATCH 43/72] unicore32: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/unicore32/include/asm/pgalloc.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h index 0213e373a895..2e02d1356fdf 100644 --- a/arch/unicore32/include/asm/pgalloc.h +++ b/arch/unicore32/include/asm/pgalloc.h @@ -51,12 +51,14 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr) struct page *pte; pte = alloc_pages(PGALLOC_GFP, 0); - if (pte) { - if (!PageHighMem(pte)) { - void *page = page_address(pte); - clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); - } - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!PageHighMem(pte)) { + void *page = page_address(pte); + clean_dcache_area(page, PTRS_PER_PTE * sizeof(pte_t)); + } + if (!pgtable_page_ctor(pte)) { + __free_page(pte); } return pte; From cecbd1b5afb01ed6ee14e14234b2e8c52e26c841 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:47 -0800 Subject: [PATCH 44/72] x86: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/pgtable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1a7d21342e02..a7cccb6d7fec 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -25,8 +25,12 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) struct page *pte; pte = alloc_pages(__userpte_alloc_gfp, 0); - if (pte) - pgtable_page_ctor(pte); + if (!pte) + return NULL; + if (!pgtable_page_ctor(pte)) { + __free_page(pte); + return NULL; + } return pte; } From 8f43123d170ad9a6760e424af5bde7569fdd8c12 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:48 -0800 Subject: [PATCH 45/72] xtensa: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/include/asm/pgalloc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index 037671a655dc..b8774f1e21e6 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -58,7 +58,10 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, if (!pte) return NULL; page = virt_to_page(pte); - pgtable_page_ctor(page); + if (!pgtable_page_ctor(page)) { + kmem_cache_free(pgtable_cache, pte); + return NULL; + } return page; } From 01058e70767de7b846588ef651c4d66e862f6823 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:49 -0800 Subject: [PATCH 46/72] iommu/arm-smmu: handle pgtable_page_ctor() fail Signed-off-by: Kirill A. Shutemov Acked-by: Will Deacon Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iommu/arm-smmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 181c9ba929cd..2349d6272aef 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1212,7 +1212,10 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, arm_smmu_flush_pgtable(smmu, page_address(table), ARM_SMMU_PTE_HWTABLE_SIZE); - pgtable_page_ctor(table); + if (!pgtable_page_ctor(table)) { + __free_page(table); + return -ENOMEM; + } pmd_populate(NULL, pmd, table); arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd)); } From f820e2805c7acb157a78438d07e47f4fc57fe679 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:50 -0800 Subject: [PATCH 47/72] xtensa: use buddy allocator for PTE table At the moment xtensa uses slab allocator for PTE table. It doesn't work with enabled split page table lock: slab uses page->slab_cache and page->first_page for its pages. These fields share stroage with page->ptl. Signed-off-by: Kirill A. Shutemov Cc: Chris Zankel Acked-by: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/include/asm/pgalloc.h | 20 ++++++++++++-------- arch/xtensa/include/asm/pgtable.h | 3 +-- arch/xtensa/mm/mmu.c | 20 -------------------- 3 files changed, 13 insertions(+), 30 deletions(-) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index b8774f1e21e6..d38eb9237e64 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -38,14 +38,18 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long)pgd); } -/* Use a slab cache for the pte pages (see also sparc64 implementation) */ - -extern struct kmem_cache *pgtable_cache; - static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(pgtable_cache, GFP_KERNEL|__GFP_REPEAT); + pte_t *ptep; + int i; + + ptep = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + if (!ptep) + return NULL; + for (i = 0; i < 1024; i++) + pte_clear(NULL, 0, ptep + i); + return ptep; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm, @@ -59,7 +63,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, return NULL; page = virt_to_page(pte); if (!pgtable_page_ctor(page)) { - kmem_cache_free(pgtable_cache, pte); + __free_page(page); return NULL; } return page; @@ -67,13 +71,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm, static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) { - kmem_cache_free(pgtable_cache, pte); + free_page((unsigned long)pte); } static inline void pte_free(struct mm_struct *mm, pgtable_t pte) { pgtable_page_dtor(pte); - kmem_cache_free(pgtable_cache, page_address(pte)); + __free_page(pte); } #define pmd_pgtable(pmd) pmd_page(pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 0fdf5d043f82..216446295ada 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -220,12 +220,11 @@ extern unsigned long empty_zero_page[1024]; #ifdef CONFIG_MMU extern pgd_t swapper_pg_dir[PAGE_SIZE/sizeof(pgd_t)]; extern void paging_init(void); -extern void pgtable_cache_init(void); #else # define swapper_pg_dir NULL static inline void paging_init(void) { } -static inline void pgtable_cache_init(void) { } #endif +static inline void pgtable_cache_init(void) { } /* * The pmd contains the kernel virtual address of the pte page. diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index a1077570e383..c43771c974be 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -50,23 +50,3 @@ void __init init_mmu(void) */ set_ptevaddr_register(PGTABLE_START); } - -struct kmem_cache *pgtable_cache __read_mostly; - -static void pgd_ctor(void *addr) -{ - pte_t *ptep = (pte_t *)addr; - int i; - - for (i = 0; i < 1024; i++, ptep++) - pte_clear(NULL, 0, ptep); - -} - -void __init pgtable_cache_init(void) -{ - pgtable_cache = kmem_cache_create("pgd", - PAGE_SIZE, PAGE_SIZE, - SLAB_HWCACHE_ALIGN, - pgd_ctor); -} From 49076ec2ccaf68610aa03d96bced9a6694b93ca1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:51 -0800 Subject: [PATCH 48/72] mm: dynamically allocate page->ptl if it cannot be embedded to struct page If split page table lock is in use, we embed the lock into struct page of table's page. We have to disable split lock, if spinlock_t is too big be to be embedded, like when DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC enabled. This patch add support for dynamic allocation of split page table lock if we can't embed it to struct page. page->ptl is unsigned long now and we use it as spinlock_t if sizeof(spinlock_t) <= sizeof(long), otherwise it's pointer to spinlock_t. The spinlock_t allocated in pgtable_page_ctor() for PTE table and in pgtable_pmd_page_ctor() for PMD table. All other helpers converted to support dynamically allocated page->ptl. Signed-off-by: Kirill A. Shutemov Reviewed-by: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/split_page_table_lock | 94 ++++++++++++++++++++++++++ arch/x86/xen/mmu.c | 2 +- include/linux/mm.h | 81 ++++++++++++++++------ include/linux/mm_types.h | 5 +- mm/Kconfig | 2 - mm/memory.c | 19 ++++++ 6 files changed, 179 insertions(+), 24 deletions(-) create mode 100644 Documentation/vm/split_page_table_lock diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock new file mode 100644 index 000000000000..7521d367f21d --- /dev/null +++ b/Documentation/vm/split_page_table_lock @@ -0,0 +1,94 @@ +Split page table lock +===================== + +Originally, mm->page_table_lock spinlock protected all page tables of the +mm_struct. But this approach leads to poor page fault scalability of +multi-threaded applications due high contention on the lock. To improve +scalability, split page table lock was introduced. + +With split page table lock we have separate per-table lock to serialize +access to the table. At the moment we use split lock for PTE and PMD +tables. Access to higher level tables protected by mm->page_table_lock. + +There are helpers to lock/unlock a table and other accessor functions: + - pte_offset_map_lock() + maps pte and takes PTE table lock, returns pointer to the taken + lock; + - pte_unmap_unlock() + unlocks and unmaps PTE table; + - pte_alloc_map_lock() + allocates PTE table if needed and take the lock, returns pointer + to taken lock or NULL if allocation failed; + - pte_lockptr() + returns pointer to PTE table lock; + - pmd_lock() + takes PMD table lock, returns pointer to taken lock; + - pmd_lockptr() + returns pointer to PMD table lock; + +Split page table lock for PTE tables is enabled compile-time if +CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS. +If split lock is disabled, all tables guaded by mm->page_table_lock. + +Split page table lock for PMD tables is enabled, if it's enabled for PTE +tables and the architecture supports it (see below). + +Hugetlb and split page table lock +--------------------------------- + +Hugetlb can support several page sizes. We use split lock only for PMD +level, but not for PUD. + +Hugetlb-specific helpers: + - huge_pte_lock() + takes pmd split lock for PMD_SIZE page, mm->page_table_lock + otherwise; + - huge_pte_lockptr() + returns pointer to table lock; + +Support of split page table lock by an architecture +--------------------------------------------------- + +There's no need in special enabling of PTE split page table lock: +everything required is done by pgtable_page_ctor() and pgtable_page_dtor(), +which must be called on PTE table allocation / freeing. + +Make sure the architecture doesn't use slab allocator for page table +allocation: slab uses page->slab_cache and page->first_page for its pages. +These fields share storage with page->ptl. + +PMD split lock only makes sense if you have more than two page table +levels. + +PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table +allocation and pgtable_pmd_page_dtor() on freeing. + +Allocation usually happens in pmd_alloc_one(), freeing in pmd_free(), but +make sure you cover all PMD table allocation / freeing paths: i.e X86_PAE +preallocate few PMDs on pgd_alloc(). + +With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK. + +NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must +be handled properly. + +page->ptl +--------- + +page->ptl is used to access split page table lock, where 'page' is struct +page of page containing the table. It shares storage with page->private +(and few other fields in union). + +To avoid increasing size of struct page and have best performance, we use a +trick: + - if spinlock_t fits into long, we use page->ptr as spinlock, so we + can avoid indirect access and save a cache line. + - if size of spinlock_t is bigger then size of long, we use page->ptl as + pointer to spinlock_t and allocate it dynamically. This allows to use + split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs + one more cache line for indirect access; + +The spinlock_t allocated in pgtable_page_ctor() for PTE table and in +pgtable_pmd_page_ctor() for PMD table. + +Please, never access page->ptl directly -- use appropriate helper. diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 455c873ce009..49c962fe7e62 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -797,7 +797,7 @@ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) spinlock_t *ptl = NULL; #if USE_SPLIT_PTE_PTLOCKS - ptl = __pte_lockptr(page); + ptl = ptlock_ptr(page); spin_lock_nest_lock(ptl, &mm->page_table_lock); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index e855351c3361..d0339741b6ce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1317,32 +1317,73 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #if USE_SPLIT_PTE_PTLOCKS -/* - * We tuck a spinlock to guard each pagetable page into its struct page, - * at page->private, with BUILD_BUG_ON to make sure that this will not - * overflow into the next struct page (as it might with DEBUG_SPINLOCK). - * When freeing, reset page->mapping so free_pages_check won't complain. - */ -#define __pte_lockptr(page) &((page)->ptl) -#define pte_lock_init(_page) do { \ - spin_lock_init(__pte_lockptr(_page)); \ -} while (0) -#define pte_lock_deinit(page) ((page)->mapping = NULL) -#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) +bool __ptlock_alloc(struct page *page); +void __ptlock_free(struct page *page); +static inline bool ptlock_alloc(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + return __ptlock_alloc(page); + return true; +} +static inline void ptlock_free(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + __ptlock_free(page); +} + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + return (spinlock_t *) page->ptl; + else + return (spinlock_t *) &page->ptl; +} + +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return ptlock_ptr(pmd_page(*pmd)); +} + +static inline bool ptlock_init(struct page *page) +{ + /* + * prep_new_page() initialize page->private (and therefore page->ptl) + * with 0. Make sure nobody took it in use in between. + * + * It can happen if arch try to use slab for page table allocation: + * slab code uses page->slab_cache and page->first_page (for tail + * pages), which share storage with page->ptl. + */ + VM_BUG_ON(page->ptl); + if (!ptlock_alloc(page)) + return false; + spin_lock_init(ptlock_ptr(page)); + return true; +} + +/* Reset page->mapping so free_pages_check won't complain. */ +static inline void pte_lock_deinit(struct page *page) +{ + page->mapping = NULL; + ptlock_free(page); +} + #else /* !USE_SPLIT_PTE_PTLOCKS */ /* * We use mm->page_table_lock to guard all pagetable pages of the mm. */ -#define pte_lock_init(page) do {} while (0) -#define pte_lock_deinit(page) do {} while (0) -#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +{ + return &mm->page_table_lock; +} +static inline bool ptlock_init(struct page *page) { return true; } +static inline void pte_lock_deinit(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ static inline bool pgtable_page_ctor(struct page *page) { - pte_lock_init(page); inc_zone_page_state(page, NR_PAGETABLE); - return true; + return ptlock_init(page); } static inline void pgtable_page_dtor(struct page *page) @@ -1383,16 +1424,15 @@ static inline void pgtable_page_dtor(struct page *page) static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) { - return &virt_to_page(pmd)->ptl; + return ptlock_ptr(virt_to_page(pmd)); } static inline bool pgtable_pmd_page_ctor(struct page *page) { - spin_lock_init(&page->ptl); #ifdef CONFIG_TRANSPARENT_HUGEPAGE page->pmd_huge_pte = NULL; #endif - return true; + return ptlock_init(page); } static inline void pgtable_pmd_page_dtor(struct page *page) @@ -1400,6 +1440,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page) #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON(page->pmd_huge_pte); #endif + ptlock_free(page); } #define pmd_huge_pte(mm, pmd) (virt_to_page(pmd)->pmd_huge_pte) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 934261099975..423da79877e6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -147,7 +147,10 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTE_PTLOCKS - spinlock_t ptl; + unsigned long ptl; /* It's spinlock_t if it fits to long, + * otherwise it's pointer to dynamicaly + * allocated spinlock_t. + */ #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ diff --git a/mm/Kconfig b/mm/Kconfig index 7aa02def4666..de31af256207 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -218,8 +218,6 @@ config SPLIT_PTLOCK_CPUS int default "999999" if ARM && !CPU_CACHE_VIPT default "999999" if PARISC && !PA20 - default "999999" if DEBUG_SPINLOCK || DEBUG_LOCK_ALLOC - default "999999" if !64BIT && GENERIC_LOCKBREAK default "4" config ARCH_ENABLE_SPLIT_PMD_PTLOCK diff --git a/mm/memory.c b/mm/memory.c index d05c6b10e926..24ffae2a530e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4270,3 +4270,22 @@ void copy_user_huge_page(struct page *dst, struct page *src, } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ + +#if USE_SPLIT_PTE_PTLOCKS +bool __ptlock_alloc(struct page *page) +{ + spinlock_t *ptl; + + ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); + if (!ptl) + return false; + page->ptl = (unsigned long)ptl; + return true; +} + +void __ptlock_free(struct page *page) +{ + if (sizeof(spinlock_t) > sizeof(page->ptl)) + kfree((spinlock_t *)page->ptl); +} +#endif From 539edb5846c740d78a8b6c2e43a99ca4323df68f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 14:31:52 -0800 Subject: [PATCH 49/72] mm: properly separate the bloated ptl from the regular case Use kernel/bounds.c to convert build-time spinlock_t size check into a preprocessor symbol and apply that to properly separate the page::ptl situation. Signed-off-by: Peter Zijlstra Signed-off-by: Kirill A. Shutemov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 +++++++++++++----------- include/linux/mm_types.h | 9 +++++---- kernel/bounds.c | 2 ++ mm/memory.c | 11 +++++------ 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index d0339741b6ce..1cedd000cf29 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1317,27 +1317,29 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #if USE_SPLIT_PTE_PTLOCKS -bool __ptlock_alloc(struct page *page); -void __ptlock_free(struct page *page); +#if BLOATED_SPINLOCKS +extern bool ptlock_alloc(struct page *page); +extern void ptlock_free(struct page *page); + +static inline spinlock_t *ptlock_ptr(struct page *page) +{ + return page->ptl; +} +#else /* BLOATED_SPINLOCKS */ static inline bool ptlock_alloc(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - return __ptlock_alloc(page); return true; } + static inline void ptlock_free(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - __ptlock_free(page); } static inline spinlock_t *ptlock_ptr(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - return (spinlock_t *) page->ptl; - else - return (spinlock_t *) &page->ptl; + return &page->ptl; } +#endif /* BLOATED_SPINLOCKS */ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { @@ -1354,7 +1356,7 @@ static inline bool ptlock_init(struct page *page) * slab code uses page->slab_cache and page->first_page (for tail * pages), which share storage with page->ptl. */ - VM_BUG_ON(page->ptl); + VM_BUG_ON(*(unsigned long *)&page->ptl); if (!ptlock_alloc(page)) return false; spin_lock_init(ptlock_ptr(page)); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 423da79877e6..10f5a7272b80 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -147,10 +147,11 @@ struct page { * system if PG_buddy is set. */ #if USE_SPLIT_PTE_PTLOCKS - unsigned long ptl; /* It's spinlock_t if it fits to long, - * otherwise it's pointer to dynamicaly - * allocated spinlock_t. - */ +#if BLOATED_SPINLOCKS + spinlock_t *ptl; +#else + spinlock_t ptl; +#endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ struct page *first_page; /* Compound tail pages */ diff --git a/kernel/bounds.c b/kernel/bounds.c index e8ca97b5c386..578782ef6ae1 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -11,6 +11,7 @@ #include #include #include +#include void foo(void) { @@ -21,5 +22,6 @@ void foo(void) #ifdef CONFIG_SMP DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif + DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int)); /* End of constants */ } diff --git a/mm/memory.c b/mm/memory.c index 24ffae2a530e..5d9025f3b3e1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4271,21 +4271,20 @@ void copy_user_huge_page(struct page *dst, struct page *src, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ -#if USE_SPLIT_PTE_PTLOCKS -bool __ptlock_alloc(struct page *page) +#if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS +bool ptlock_alloc(struct page *page) { spinlock_t *ptl; ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL); if (!ptl) return false; - page->ptl = (unsigned long)ptl; + page->ptl = ptl; return true; } -void __ptlock_free(struct page *page) +void ptlock_free(struct page *page) { - if (sizeof(spinlock_t) > sizeof(page->ptl)) - kfree((spinlock_t *)page->ptl); + kfree(page->ptl); } #endif From ea1e7ed33708c7a760419ff9ded0a6cb90586a50 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 14 Nov 2013 14:31:53 -0800 Subject: [PATCH 50/72] mm: create a separate slab for page->ptl allocation If DEBUG_SPINLOCK and DEBUG_LOCK_ALLOC are enabled spinlock_t on x86_64 is 72 bytes. For page->ptl they will be allocated from kmalloc-96 slab, so we loose 24 on each. An average system can easily allocate few tens thousands of page->ptl and overhead is significant. Let's create a separate slab for page->ptl allocation to solve this. Signed-off-by: Kirill A. Shutemov Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 +++++++++ init/main.c | 2 +- mm/memory.c | 7 +++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 1cedd000cf29..0548eb201e05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1318,6 +1318,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a #if USE_SPLIT_PTE_PTLOCKS #if BLOATED_SPINLOCKS +void __init ptlock_cache_init(void); extern bool ptlock_alloc(struct page *page); extern void ptlock_free(struct page *page); @@ -1326,6 +1327,7 @@ static inline spinlock_t *ptlock_ptr(struct page *page) return page->ptl; } #else /* BLOATED_SPINLOCKS */ +static inline void ptlock_cache_init(void) {} static inline bool ptlock_alloc(struct page *page) { return true; @@ -1378,10 +1380,17 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct page *page) { return true; } static inline void pte_lock_deinit(struct page *page) {} #endif /* USE_SPLIT_PTE_PTLOCKS */ +static inline void pgtable_init(void) +{ + ptlock_cache_init(); + pgtable_cache_init(); +} + static inline bool pgtable_page_ctor(struct page *page) { inc_zone_page_state(page, NR_PAGETABLE); diff --git a/init/main.c b/init/main.c index 6ad1a533a8c7..5f191133376f 100644 --- a/init/main.c +++ b/init/main.c @@ -473,7 +473,7 @@ static void __init mm_init(void) mem_init(); kmem_cache_init(); percpu_init_late(); - pgtable_cache_init(); + pgtable_init(); vmalloc_init(); } diff --git a/mm/memory.c b/mm/memory.c index 5d9025f3b3e1..0409e8f43fa0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4272,6 +4272,13 @@ void copy_user_huge_page(struct page *dst, struct page *src, #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #if USE_SPLIT_PTE_PTLOCKS && BLOATED_SPINLOCKS +static struct kmem_cache *page_ptl_cachep; +void __init ptlock_cache_init(void) +{ + page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, + SLAB_PANIC, NULL); +} + bool ptlock_alloc(struct page *page) { spinlock_t *ptl; From 57f4257eae33e036125973858934730250d464e3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 14 Nov 2013 14:31:54 -0800 Subject: [PATCH 51/72] lockref: use BLOATED_SPINLOCKS to avoid explicit config dependencies Avoid the fragile Kconfig construct guestimating spinlock_t sizes; use a friendly compile-time test to determine this. [kirill.shutemov@linux.intel.com: drop CONFIG_CMPXCHG_LOCKREF] Signed-off-by: Peter Zijlstra Signed-off-by: Kirill A. Shutemov Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 7 ++++++- lib/Kconfig | 7 ------- lib/lockref.c | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 13dfd36a3294..c8929c3832db 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -15,10 +15,15 @@ */ #include +#include + +#define USE_CMPXCHG_LOCKREF \ + (IS_ENABLED(CONFIG_ARCH_USE_CMPXCHG_LOCKREF) && \ + IS_ENABLED(CONFIG_SMP) && !BLOATED_SPINLOCKS) struct lockref { union { -#ifdef CONFIG_CMPXCHG_LOCKREF +#if USE_CMPXCHG_LOCKREF aligned_u64 lock_count; #endif struct { diff --git a/lib/Kconfig b/lib/Kconfig index 75485e163ca3..06dc74200a51 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -51,13 +51,6 @@ config PERCPU_RWSEM config ARCH_USE_CMPXCHG_LOCKREF bool -config CMPXCHG_LOCKREF - def_bool y if ARCH_USE_CMPXCHG_LOCKREF - depends on SMP - depends on !GENERIC_LOCKBREAK - depends on !DEBUG_SPINLOCK - depends on !DEBUG_LOCK_ALLOC - config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/lockref.c b/lib/lockref.c index af6e95d0bed6..d2b123f8456b 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -1,7 +1,7 @@ #include #include -#ifdef CONFIG_CMPXCHG_LOCKREF +#if USE_CMPXCHG_LOCKREF /* * Allow weakly-ordered memory architectures to provide barrier-less From 839cc2a94cc3665bafe32203c2f095f4dd470a80 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 14 Nov 2013 14:31:56 -0800 Subject: [PATCH 52/72] seq_file: introduce seq_setwidth() and seq_pad() There are several users who want to know bytes written by seq_*() for alignment purpose. Currently they are using %n format for knowing it because seq_*() returns 0 on success. This patch introduces seq_setwidth() and seq_pad() for allowing them to align without using %n format. Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Cc: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 15 +++++++++++++++ include/linux/seq_file.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/fs/seq_file.c b/fs/seq_file.c index a290157265ef..1cd2388ca5bd 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -766,6 +766,21 @@ int seq_write(struct seq_file *seq, const void *data, size_t len) } EXPORT_SYMBOL(seq_write); +/** + * seq_pad - write padding spaces to buffer + * @m: seq_file identifying the buffer to which data should be written + * @c: the byte to append after padding if non-zero + */ +void seq_pad(struct seq_file *m, char c) +{ + int size = m->pad_until - m->count; + if (size > 0) + seq_printf(m, "%*s", size, ""); + if (c) + seq_putc(m, c); +} +EXPORT_SYMBOL(seq_pad); + struct list_head *seq_list_start(struct list_head *head, loff_t pos) { struct list_head *lh; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 4e32edc8f506..52e0097f61f0 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -20,6 +20,7 @@ struct seq_file { size_t size; size_t from; size_t count; + size_t pad_until; loff_t index; loff_t read_pos; u64 version; @@ -79,6 +80,20 @@ static inline void seq_commit(struct seq_file *m, int num) } } +/** + * seq_setwidth - set padding width + * @m: the seq_file handle + * @size: the max number of bytes to pad. + * + * Call seq_setwidth() for setting max width, then call seq_printf() etc. and + * finally call seq_pad() to pad the remaining bytes. + */ +static inline void seq_setwidth(struct seq_file *m, size_t size) +{ + m->pad_until = m->count + size; +} +void seq_pad(struct seq_file *m, char c); + char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); From 652586df95e5d76b37d07a11839126dcfede1621 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 14 Nov 2013 14:31:57 -0800 Subject: [PATCH 53/72] seq_file: remove "%n" usage from seq_file users All seq_printf() users are using "%n" for calculating padding size, convert them to use seq_setwidth() / seq_pad() pair. Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Cc: Joe Perches Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/consoles.c | 10 ++++------ fs/proc/nommu.c | 12 +++++------- fs/proc/task_mmu.c | 20 ++++++-------------- fs/proc/task_nommu.c | 19 ++++++------------- net/ipv4/fib_trie.c | 13 +++++++------ net/ipv4/ping.c | 15 +++++++-------- net/ipv4/tcp_ipv4.c | 33 +++++++++++++++------------------ net/ipv4/udp.c | 15 +++++++-------- net/phonet/socket.c | 24 +++++++++++------------- net/sctp/objcnt.c | 9 +++++---- 10 files changed, 73 insertions(+), 97 deletions(-) diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index b701eaa482bf..51942d5abcec 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -29,7 +29,6 @@ static int show_console_dev(struct seq_file *m, void *v) char flags[ARRAY_SIZE(con_flags) + 1]; struct console *con = v; unsigned int a; - int len; dev_t dev = 0; if (con->device) { @@ -47,11 +46,10 @@ static int show_console_dev(struct seq_file *m, void *v) con_flags[a].name : ' '; flags[a] = 0; - seq_printf(m, "%s%d%n", con->name, con->index, &len); - len = 21 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-', + seq_setwidth(m, 21 - 1); + seq_printf(m, "%s%d", con->name, con->index); + seq_pad(m, ' '); + seq_printf(m, "%c%c%c (%s)", con->read ? 'R' : '-', con->write ? 'W' : '-', con->unblank ? 'U' : '-', flags); if (dev) diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index ccfd99bd1c5a..5f9bc8a746c9 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -39,7 +39,7 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; flags = region->vm_flags; file = region->vm_file; @@ -50,8 +50,9 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) ino = inode->i_ino; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", region->vm_start, region->vm_end, flags & VM_READ ? 'r' : '-', @@ -59,13 +60,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', ((loff_t)region->vm_pgoff) << PAGE_SHIFT, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - len = 25 + sizeof(void *) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 42b5cf5d0326..fb52b548080d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -84,14 +84,6 @@ unsigned long task_statm(struct mm_struct *mm, return mm->total_vm; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - #ifdef CONFIG_NUMA /* * These functions are for numa_maps but called in generic **maps seq_file @@ -269,7 +261,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) unsigned long long pgoff = 0; unsigned long start, end; dev_t dev = 0; - int len; const char *name = NULL; if (file) { @@ -287,7 +278,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) if (stack_guard_page_end(vma, end)) end -= PAGE_SIZE; - seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", start, end, flags & VM_READ ? 'r' : '-', @@ -295,14 +287,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); /* * Print the dentry name for named mappings, and a * special [heap] marker for the heap: */ if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, "\n"); goto done; } @@ -334,7 +326,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) name = "[stack]"; } else { /* Thread stack in /proc/PID/maps */ - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_printf(m, "[stack:%d]", tid); } } @@ -342,7 +334,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) done: if (name) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_puts(m, name); } seq_putc(m, '\n'); diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 56123a6f462e..678455d2d683 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -123,14 +123,6 @@ unsigned long task_statm(struct mm_struct *mm, return size; } -static void pad_len_spaces(struct seq_file *m, int len) -{ - len = 25 + sizeof(void*) * 6 - len; - if (len < 1) - len = 1; - seq_printf(m, "%*c", len, ' '); -} - /* * display a single VMA to a sequenced file */ @@ -142,7 +134,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, unsigned long ino = 0; struct file *file; dev_t dev = 0; - int flags, len; + int flags; unsigned long long pgoff = 0; flags = vma->vm_flags; @@ -155,8 +147,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT; } + seq_setwidth(m, 25 + sizeof(void *) * 6 - 1); seq_printf(m, - "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', @@ -164,16 +157,16 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', pgoff, - MAJOR(dev), MINOR(dev), ino, &len); + MAJOR(dev), MINOR(dev), ino); if (file) { - pad_len_spaces(m, len); + seq_pad(m, ' '); seq_path(m, &file->f_path, ""); } else if (mm) { pid_t tid = vm_is_stack(priv->task, vma, is_pid); if (tid != 0) { - pad_len_spaces(m, len); + seq_pad(m, ' '); /* * Thread stack in /proc/PID/task/TID/maps or * the main process stack. diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index ec9a9ef4ce50..5afeb5aa4c7c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2523,16 +2523,17 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) list_for_each_entry_rcu(fa, &li->falh, fa_list) { const struct fib_info *fi = fa->fa_info; unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi); - int len; if (fa->fa_type == RTN_BROADCAST || fa->fa_type == RTN_MULTICAST) continue; + seq_setwidth(seq, 127); + if (fi) seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u%n", + "%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, fi->fib_nh->nh_gw, flags, 0, 0, @@ -2541,15 +2542,15 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) (fi->fib_advmss ? fi->fib_advmss + 40 : 0), fi->fib_window, - fi->fib_rtt >> 3, &len); + fi->fib_rtt >> 3); else seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u%n", + "%d\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, - mask, 0, 0, 0, &len); + mask, 0, 0, 0); - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_pad(seq, '\n'); } } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9afbdb19f4a2..cbc85f660d54 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -1076,7 +1076,7 @@ void ping_seq_stop(struct seq_file *seq, void *v) EXPORT_SYMBOL_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, - int bucket, int *len) + int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; @@ -1085,7 +1085,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), @@ -1093,23 +1093,22 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops), len); + atomic_read(&sp->sk_drops)); } static int ping_v4_seq_show(struct seq_file *seq, void *v) { + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct ping_iter_state *state = seq->private; - int len; - ping_v4_format_sock(v, seq, state->bucket, &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + ping_v4_format_sock(v, seq, state->bucket); } + seq_pad(seq, '\n'); return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 14bba8a1c5a7..59a6f8b90cd9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2541,13 +2541,13 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, kuid_t uid, int *len) + struct seq_file *f, int i, kuid_t uid) { const struct inet_request_sock *ireq = inet_rsk(req); long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK", i, ireq->ir_loc_addr, ntohs(inet_sk(sk)->inet_sport), @@ -2562,11 +2562,10 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), - req, - len); + req); } -static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) +static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) { int timer_active; unsigned long timer_expires; @@ -2605,7 +2604,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0); seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " - "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d%n", + "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d", i, src, srcp, dest, destp, sk->sk_state, tp->write_seq - tp->snd_una, rx_queue, @@ -2622,12 +2621,11 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->snd_cwnd, sk->sk_state == TCP_LISTEN ? (fastopenq ? fastopenq->max_qlen : 0) : - (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh), - len); + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)); } static void get_timewait4_sock(const struct inet_timewait_sock *tw, - struct seq_file *f, int i, int *len) + struct seq_file *f, int i) { __be32 dest, src; __u16 destp, srcp; @@ -2639,10 +2637,10 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, srcp = ntohs(tw->tw_sport); seq_printf(f, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw, len); + atomic_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 @@ -2651,11 +2649,10 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; struct sock *sk = v; - int len; + seq_setwidth(seq, TMPSZ - 1); if (v == SEQ_START_TOKEN) { - seq_printf(seq, "%-*s\n", TMPSZ - 1, - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode"); goto out; @@ -2666,16 +2663,16 @@ static int tcp4_seq_show(struct seq_file *seq, void *v) case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_ESTABLISHED: if (sk->sk_state == TCP_TIME_WAIT) - get_timewait4_sock(v, seq, st->num, &len); + get_timewait4_sock(v, seq, st->num); else - get_tcp4_sock(v, seq, st->num, &len); + get_tcp4_sock(v, seq, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); + get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid); break; } - seq_printf(seq, "%*s\n", TMPSZ - 1 - len, ""); out: + seq_pad(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 89909dd730dd..de86e5bc4462 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2331,7 +2331,7 @@ EXPORT_SYMBOL(udp_proc_unregister); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, - int bucket, int *len) + int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; @@ -2340,7 +2340,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d%n", + " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), @@ -2348,23 +2348,22 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops), len); + atomic_read(&sp->sk_drops)); } int udp4_seq_show(struct seq_file *seq, void *v) { + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct udp_iter_state *state = seq->private; - int len; - udp4_format_sock(v, seq, state->bucket, &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + udp4_format_sock(v, seq, state->bucket); } + seq_pad(seq, '\n'); return 0; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 77e38f733496..008214a3d5eb 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -595,26 +595,25 @@ static void pn_sock_seq_stop(struct seq_file *seq, void *v) static int pn_sock_seq_show(struct seq_file *seq, void *v) { - int len; - + seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%s%n", "pt loc rem rs st tx_queue rx_queue " - " uid inode ref pointer drops", &len); + seq_puts(seq, "pt loc rem rs st tx_queue rx_queue " + " uid inode ref pointer drops"); else { struct sock *sk = v; struct pn_sock *pn = pn_sk(sk); seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu " - "%d %pK %d%n", + "%d %pK %d", sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, - atomic_read(&sk->sk_drops), &len); + atomic_read(&sk->sk_drops)); } - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_pad(seq, '\n'); return 0; } @@ -785,20 +784,19 @@ static void pn_res_seq_stop(struct seq_file *seq, void *v) static int pn_res_seq_show(struct seq_file *seq, void *v) { - int len; - + seq_setwidth(seq, 63); if (v == SEQ_START_TOKEN) - seq_printf(seq, "%s%n", "rs uid inode", &len); + seq_puts(seq, "rs uid inode"); else { struct sock **psk = v; struct sock *sk = *psk; - seq_printf(seq, "%02X %5u %lu%n", + seq_printf(seq, "%02X %5u %lu", (int) (psk - pnres.sk), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), - sock_i_ino(sk), &len); + sock_i_ino(sk)); } - seq_printf(seq, "%*s\n", 63 - len, ""); + seq_pad(seq, '\n'); return 0; } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 5ea573b37648..647396baa56f 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -79,12 +79,13 @@ static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { */ static int sctp_objcnt_seq_show(struct seq_file *seq, void *v) { - int i, len; + int i; i = (int)*(loff_t *)v; - seq_printf(seq, "%s: %d%n", sctp_dbg_objcnt[i].label, - atomic_read(sctp_dbg_objcnt[i].counter), &len); - seq_printf(seq, "%*s\n", 127 - len, ""); + seq_setwidth(seq, 127); + seq_printf(seq, "%s: %d", sctp_dbg_objcnt[i].label, + atomic_read(sctp_dbg_objcnt[i].counter)); + seq_pad(seq, '\n'); return 0; } From 9196436ab2f713b823a2ba2024cb69f40b2f54a5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 14 Nov 2013 14:31:58 -0800 Subject: [PATCH 54/72] vsprintf: ignore %n again This ignores %n in printf again, as was originally documented. Implementing %n poses a greater security risk than utility, so it should stay ignored. To help anyone attempting to use %n, a warning will be emitted if it is encountered. Based on an earlier patch by Joe Perches. Because %n was designed to write to pointers on the stack, it has been frequently used as an attack vector when bugs are found that leak user-controlled strings into functions that ultimately process format strings. While this class of bug can still be turned into an information leak, removing %n eliminates the common method of elevating such a bug into an arbitrary kernel memory writing primitive, significantly reducing the danger of this class of bug. For seq_file users that need to know the length of a written string for padding, please see seq_setwidth() and seq_pad() instead. Signed-off-by: Kees Cook Cc: Joe Perches Cc: Tetsuo Handa Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 48586ac3a62e..10909c571494 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1712,18 +1712,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_NRCHARS: { - u8 qualifier = spec.qualifier; + /* + * Since %n poses a greater security risk than + * utility, ignore %n and skip its argument. + */ + void *skip_arg; - if (qualifier == 'l') { - long *ip = va_arg(args, long *); - *ip = (str - buf); - } else if (_tolower(qualifier) == 'z') { - size_t *ip = va_arg(args, size_t *); - *ip = (str - buf); - } else { - int *ip = va_arg(args, int *); - *ip = (str - buf); - } + WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", + old_fmt); + + skip_arg = va_arg(args, void *); break; } From d5ceede8dc86278d16dcad8f916ef323b5672bd8 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 14 Nov 2013 14:31:59 -0800 Subject: [PATCH 55/72] drivers/rtc/rtc-hid-sensor-time.c: use dev_get_platdata() Use the wrapper function for retrieving the platform data instead of accessing dev->platform_data directly. This is a cosmetic change to make the code simpler and enhance the readability. Signed-off-by: Jingoo Han Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-hid-sensor-time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 45560ffb038d..1ba369043b8d 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -236,7 +236,7 @@ static const struct rtc_class_ops hid_time_rtc_ops = { static int hid_time_probe(struct platform_device *pdev) { int ret = 0; - struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + struct hid_sensor_hub_device *hsdev = dev_get_platdata(&pdev->dev); struct hid_time_state *time_state = devm_kzalloc(&pdev->dev, sizeof(struct hid_time_state), GFP_KERNEL); @@ -303,7 +303,7 @@ err_open: static int hid_time_remove(struct platform_device *pdev) { - struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data; + struct hid_sensor_hub_device *hsdev = dev_get_platdata(&pdev->dev); sensor_hub_device_close(hsdev); sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME); From 406bf31893163cbe5b0b03a281685c7dc95c9380 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Thu, 14 Nov 2013 14:32:00 -0800 Subject: [PATCH 56/72] drivers/rtc/rtc-hid-sensor-time.c: enable HID input processing early Enable the processing of HID input records before the RTC will be registered, in order to allow the RTC register function to read clock. Without doing that the clock can only be read after the probe function has finished. Signed-off-by: Alexander Holler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-hid-sensor-time.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index 1ba369043b8d..a34e5cfd2ab5 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -281,11 +281,18 @@ static int hid_time_probe(struct platform_device *pdev) goto err_open; } + /* + * Enable HID input processing early in order to be able to read the + * clock already in devm_rtc_device_register(). + */ + hid_device_io_start(hsdev->hdev); + time_state->rtc = devm_rtc_device_register(&pdev->dev, "hid-sensor-time", &hid_time_rtc_ops, THIS_MODULE); if (IS_ERR_OR_NULL(time_state->rtc)) { + hid_device_io_stop(hsdev->hdev); ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV; time_state->rtc = NULL; dev_err(&pdev->dev, "rtc device register failed!\n"); From c32f74ab2872994bc8336ed367313da3139350ca Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:01 -0800 Subject: [PATCH 57/72] sched: replace INIT_COMPLETION with reinit_completion For the casual device driver writer, it is hard to remember when to use init_completion (to init a completion structure) or INIT_COMPLETION (to *reinit* a completion structure). Furthermore, while all other completion functions exepct a pointer as a parameter, INIT_COMPLETION does not. To make it easier to remember which function to use and to make code more readable, introduce a new inline function with the proper name and consistent argument type. Update the kernel-doc for init_completion while we are here. Signed-off-by: Wolfram Sang Acked-by: Linus Walleij (personally at LCE13) Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/completion.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/include/linux/completion.h b/include/linux/completion.h index 22c33e35bcb2..124e4b4334c1 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -19,8 +19,8 @@ * * See also: complete(), wait_for_completion() (and friends _timeout, * _interruptible, _interruptible_timeout, and _killable), init_completion(), - * and macros DECLARE_COMPLETION(), DECLARE_COMPLETION_ONSTACK(), and - * INIT_COMPLETION(). + * reinit_completion(), and macros DECLARE_COMPLETION(), + * DECLARE_COMPLETION_ONSTACK(). */ struct completion { unsigned int done; @@ -65,7 +65,7 @@ struct completion { /** * init_completion - Initialize a dynamically allocated completion - * @x: completion structure that is to be initialized + * @x: pointer to completion structure that is to be initialized * * This inline function will initialize a dynamically created completion * structure. @@ -76,6 +76,18 @@ static inline void init_completion(struct completion *x) init_waitqueue_head(&x->wait); } +/** + * reinit_completion - reinitialize a completion structure + * @x: pointer to completion structure that is to be reinitialized + * + * This inline function should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ +static inline void reinit_completion(struct completion *x) +{ + x->done = 0; +} + extern void wait_for_completion(struct completion *); extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); From 16735d022f72b20ddbb2274b8e109f69575e9b2b Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:02 -0800 Subject: [PATCH 58/72] tree-wide: use reinit_completion instead of INIT_COMPLETION Use this new function to make code more comprehensible, since we are reinitialzing the completion, not initializing. [akpm@linux-foundation.org: linux-next resyncs] Signed-off-by: Wolfram Sang Acked-by: Linus Walleij (personally at LCE13) Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-tegra/apbio.c | 2 +- arch/powerpc/platforms/powermac/low_i2c.c | 6 +++--- arch/powerpc/platforms/pseries/suspend.c | 2 +- crypto/af_alg.c | 2 +- crypto/tcrypt.c | 4 ++-- crypto/testmgr.c | 12 ++++++------ drivers/ata/libata-eh.c | 4 ++-- drivers/base/power/main.c | 4 ++-- drivers/block/amiflop.c | 2 +- drivers/block/cciss.c | 4 ++-- drivers/char/hw_random/timeriomem-rng.c | 2 +- drivers/crypto/tegra-aes.c | 2 +- drivers/firewire/core-transaction.c | 2 +- drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c | 2 +- drivers/hid/hid-wiimote.h | 2 +- drivers/hwmon/jz4740-hwmon.c | 2 +- drivers/i2c/busses/i2c-at91.c | 2 +- drivers/i2c/busses/i2c-bcm2835.c | 2 +- drivers/i2c/busses/i2c-davinci.c | 2 +- drivers/i2c/busses/i2c-designware-core.c | 2 +- drivers/i2c/busses/i2c-ismt.c | 2 +- drivers/i2c/busses/i2c-mxs.c | 2 +- drivers/i2c/busses/i2c-omap.c | 2 +- drivers/i2c/busses/i2c-tegra.c | 2 +- drivers/i2c/busses/i2c-wmt.c | 4 ++-- drivers/iio/adc/ad_sigma_delta.c | 6 +++--- drivers/iio/adc/nau7802.c | 2 +- drivers/input/touchscreen/cyttsp_core.c | 2 +- drivers/md/dm-crypt.c | 2 +- drivers/media/platform/blackfin/bfin_capture.c | 2 +- drivers/media/radio/radio-wl1273.c | 10 +++++----- .../media/radio/si470x/radio-si470x-common.c | 4 ++-- drivers/media/rc/iguanair.c | 2 +- drivers/memstick/core/memstick.c | 2 +- drivers/memstick/host/r592.c | 2 +- drivers/misc/mic/card/mic_virtio.c | 2 +- drivers/misc/mic/host/mic_boot.c | 2 +- drivers/misc/ti-st/st_kim.c | 12 ++++++------ drivers/mtd/nand/mxc_nand.c | 2 +- drivers/mtd/nand/r852.c | 2 +- drivers/mtd/onenand/omap2.c | 10 +++++----- .../ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 2 +- drivers/net/ieee802154/at86rf230.c | 2 +- drivers/net/ieee802154/mrf24j40.c | 2 +- drivers/net/wireless/ath/ath10k/htc.c | 4 ++-- drivers/net/wireless/ath/ath10k/mac.c | 18 +++++++++--------- drivers/net/wireless/ath/carl9170/usb.c | 2 +- drivers/net/wireless/ath/wil6210/main.c | 2 +- drivers/net/wireless/brcm80211/brcmfmac/p2p.c | 4 ++-- drivers/net/wireless/zd1211rw/zd_usb.c | 2 +- drivers/parport/parport_ip32.c | 4 ++-- drivers/platform/x86/apple-gmux.c | 2 +- drivers/power/ab8500_fg.c | 4 ++-- drivers/power/jz4740-battery.c | 2 +- drivers/rtc/rtc-hid-sensor-time.c | 2 +- drivers/spi/spi-bcm2835.c | 2 +- drivers/spi/spi-clps711x.c | 2 +- drivers/spi/spi-davinci.c | 2 +- drivers/spi/spi-fsl-espi.c | 2 +- drivers/spi/spi-fsl-spi.c | 2 +- drivers/spi/spi-mpc512x-psc.c | 2 +- drivers/spi/spi-mxs.c | 2 +- drivers/spi/spi-s3c64xx.c | 2 +- drivers/spi/spi-sh-msiof.c | 2 +- drivers/spi/spi-sirf.c | 4 ++-- drivers/spi/spi-tegra114.c | 6 +++--- drivers/spi/spi-tegra20-sflash.c | 2 +- drivers/spi/spi-tegra20-slink.c | 6 +++--- drivers/spi/spi-xilinx.c | 2 +- drivers/spi/spi.c | 2 +- drivers/staging/iio/adc/mxs-lradc.c | 2 +- drivers/staging/media/solo6x10/solo6x10-p2m.c | 2 +- drivers/staging/tidspbridge/core/sync.c | 4 ++-- .../tidspbridge/include/dspbridge/sync.h | 2 +- .../staging/tidspbridge/rmgr/drv_interface.c | 6 +++--- drivers/tty/metag_da.c | 2 +- drivers/usb/c67x00/c67x00-sched.c | 2 +- drivers/usb/gadget/f_fs.c | 2 +- drivers/usb/serial/mos7720.c | 2 +- drivers/video/exynos/exynos_mipi_dsi_common.c | 4 ++-- .../omap2/displays-new/encoder-tpd12s015.c | 2 +- fs/ecryptfs/crypto.c | 2 +- fs/nfs/nfs4state.c | 2 +- fs/ocfs2/dlmglue.c | 4 ++-- sound/firewire/dice.c | 2 +- sound/soc/samsung/ac97.c | 6 +++--- 86 files changed, 138 insertions(+), 138 deletions(-) diff --git a/arch/arm/mach-tegra/apbio.c b/arch/arm/mach-tegra/apbio.c index d7aa52ea6cfc..bc471973cf04 100644 --- a/arch/arm/mach-tegra/apbio.c +++ b/arch/arm/mach-tegra/apbio.c @@ -114,7 +114,7 @@ static int do_dma_transfer(unsigned long apb_add, dma_desc->callback = apb_dma_complete; dma_desc->callback_param = NULL; - INIT_COMPLETION(tegra_apb_wait); + reinit_completion(&tegra_apb_wait); dmaengine_submit(dma_desc); dma_async_issue_pending(tegra_apb_dma_chan); diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index fc536f2971c0..7553b6a77c64 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -452,7 +452,7 @@ static int kw_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, */ if (use_irq) { /* Clear completion */ - INIT_COMPLETION(host->complete); + reinit_completion(&host->complete); /* Ack stale interrupts */ kw_write_reg(reg_isr, kw_read_reg(reg_isr)); /* Arm timeout */ @@ -717,7 +717,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, return -EINVAL; } - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = sizeof(struct pmu_i2c_hdr) + 1; @@ -748,7 +748,7 @@ static int pmu_i2c_xfer(struct pmac_i2c_bus *bus, u8 addrdir, int subsize, hdr->bus = PMU_I2C_BUS_STATUS; - INIT_COMPLETION(comp); + reinit_completion(&comp); req->data[0] = PMU_I2C_CMD; req->reply[0] = 0xff; req->nbytes = 2; diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 5f997e79d570..16a255255d30 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -106,7 +106,7 @@ static int pseries_prepare_late(void) atomic_set(&suspend_data.done, 0); atomic_set(&suspend_data.error, 0); suspend_data.complete = &suspend_work; - INIT_COMPLETION(suspend_work); + reinit_completion(&suspend_work); return 0; } diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ac33d5f30778..966f893711b3 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -434,7 +434,7 @@ int af_alg_wait_for_completion(int err, struct af_alg_completion *completion) case -EINPROGRESS: case -EBUSY: wait_for_completion(&completion->completion); - INIT_COMPLETION(completion->completion); + reinit_completion(&completion->completion); err = completion->err; break; }; diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 25a5934f0e50..1ab8258fcf56 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -493,7 +493,7 @@ static inline int do_one_ahash_op(struct ahash_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -721,7 +721,7 @@ static inline int do_one_acipher_op(struct ablkcipher_request *req, int ret) ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index e091ef6e1791..432afc03e7c3 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -179,7 +179,7 @@ static int do_one_async_hash_op(struct ahash_request *req, ret = wait_for_completion_interruptible(&tr->completion); if (!ret) ret = tr->err; - INIT_COMPLETION(tr->completion); + reinit_completion(&tr->completion); } return ret; } @@ -336,7 +336,7 @@ static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template, ret = wait_for_completion_interruptible( &tresult.completion); if (!ret && !(ret = tresult.err)) { - INIT_COMPLETION(tresult.completion); + reinit_completion(&tresult.completion); break; } /* fall through */ @@ -543,7 +543,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -697,7 +697,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !(ret = result.err)) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } case -EBADMSG: @@ -983,7 +983,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ @@ -1086,7 +1086,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc, ret = wait_for_completion_interruptible( &result.completion); if (!ret && !((ret = result.err))) { - INIT_COMPLETION(result.completion); + reinit_completion(&result.completion); break; } /* fall through */ diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 77bbc8266883..92d7797223be 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3017,7 +3017,7 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap) * ourselves at the beginning of each pass over the loop. * * Additionally, all write accesses to &ap->park_req_pending - * through INIT_COMPLETION() (see below) or complete_all() + * through reinit_completion() (see below) or complete_all() * (see ata_scsi_park_store()) are protected by the host lock. * As a result we have that park_req_pending.done is zero on * exit from this function, i.e. when ATA_EH_PARK actions for @@ -3031,7 +3031,7 @@ static inline void ata_eh_pull_park_action(struct ata_port *ap) */ spin_lock_irqsave(ap->lock, flags); - INIT_COMPLETION(ap->park_req_pending); + reinit_completion(&ap->park_req_pending); ata_for_each_link(link, ap, EDGE) { ata_for_each_dev(dev, link, ALL) { struct ata_eh_info *ehi = &link->eh_info; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index ee039afe9078..c12e9b9556be 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -757,7 +757,7 @@ void dpm_resume(pm_message_t state) async_error = 0; list_for_each_entry(dev, &dpm_suspended_list, power.entry) { - INIT_COMPLETION(dev->power.completion); + reinit_completion(&dev->power.completion); if (is_async(dev)) { get_device(dev); async_schedule(async_resume, dev); @@ -1237,7 +1237,7 @@ static void async_suspend(void *data, async_cookie_t cookie) static int device_suspend(struct device *dev) { - INIT_COMPLETION(dev->power.completion); + reinit_completion(&dev->power.completion); if (pm_async_enabled && dev->power.async_suspend) { get_device(dev); diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 4ff85b8785ee..748dea4f34dc 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -343,7 +343,7 @@ static int fd_motor_on(int nr) unit[nr].motor = 1; fd_select(nr); - INIT_COMPLETION(motor_on_completion); + reinit_completion(&motor_on_completion); motor_on_timer.data = nr; mod_timer(&motor_on_timer, jiffies + HZ/2); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 0c004ac05811..b35fc4f5237c 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2808,7 +2808,7 @@ resend_cmd2: /* erase the old error information */ memset(c->err_info, 0, sizeof(ErrorInfo_struct)); return_status = IO_OK; - INIT_COMPLETION(wait); + reinit_completion(&wait); goto resend_cmd2; } @@ -3669,7 +3669,7 @@ static int add_to_scan_list(struct ctlr_info *h) } } if (!found && !h->busy_scanning) { - INIT_COMPLETION(h->scan_wait); + reinit_completion(&h->scan_wait); list_add_tail(&h->scan_list, &scan_q); ret = 1; } diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index d2120ba8f3f9..73ce739f8e19 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -79,7 +79,7 @@ static int timeriomem_rng_data_read(struct hwrng *rng, u32 *data) priv->expires = cur + delay; priv->present = 0; - INIT_COMPLETION(priv->completion); + reinit_completion(&priv->completion); mod_timer(&priv->timer, priv->expires); return 4; diff --git a/drivers/crypto/tegra-aes.c b/drivers/crypto/tegra-aes.c index 2d58da972ae2..fa05e3c329bd 100644 --- a/drivers/crypto/tegra-aes.c +++ b/drivers/crypto/tegra-aes.c @@ -268,7 +268,7 @@ static int aes_start_crypt(struct tegra_aes_dev *dd, u32 in_addr, u32 out_addr, aes_writel(dd, value, TEGRA_AES_SECURE_INPUT_SELECT); aes_writel(dd, out_addr, TEGRA_AES_SECURE_DEST_ADDR); - INIT_COMPLETION(dd->op_complete); + reinit_completion(&dd->op_complete); for (i = 0; i < AES_HW_MAX_ICQ_LENGTH - 1; i++) { do { diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index e5af0e3a26ec..0e799516a2ab 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -477,7 +477,7 @@ void fw_send_phy_config(struct fw_card *card, phy_config_packet.header[1] = data; phy_config_packet.header[2] = ~data; phy_config_packet.generation = generation; - INIT_COMPLETION(phy_config_done); + reinit_completion(&phy_config_done); card->driver->send_request(card, &phy_config_packet); wait_for_completion_timeout(&phy_config_done, timeout); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c index 1eb86c79523e..e28107061148 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c @@ -99,7 +99,7 @@ static int xfer_read(struct i2c_adapter *adap, struct i2c_msg *pmsg) i2c_dev->status = I2C_STAT_INIT; i2c_dev->msg = pmsg; i2c_dev->buf_offset = 0; - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); /* Enable I2C transaction */ temp = ((pmsg->len) << 20) | HI2C_EDID_READ | HI2C_ENABLE_TRANSACTION; diff --git a/drivers/hid/hid-wiimote.h b/drivers/hid/hid-wiimote.h index 75db0c400037..cfa63b0825b0 100644 --- a/drivers/hid/hid-wiimote.h +++ b/drivers/hid/hid-wiimote.h @@ -327,7 +327,7 @@ static inline void wiimote_cmd_acquire_noint(struct wiimote_data *wdata) static inline void wiimote_cmd_set(struct wiimote_data *wdata, int cmd, __u32 opt) { - INIT_COMPLETION(wdata->state.ready); + reinit_completion(&wdata->state.ready); wdata->state.cmd = cmd; wdata->state.opt = opt; } diff --git a/drivers/hwmon/jz4740-hwmon.c b/drivers/hwmon/jz4740-hwmon.c index e0d66b9590ab..a183e488db78 100644 --- a/drivers/hwmon/jz4740-hwmon.c +++ b/drivers/hwmon/jz4740-hwmon.c @@ -66,7 +66,7 @@ static ssize_t jz4740_hwmon_read_adcin(struct device *dev, mutex_lock(&hwmon->lock); - INIT_COMPLETION(*completion); + reinit_completion(completion); enable_irq(hwmon->irq); hwmon->cell->enable(to_platform_device(dev)); diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index fd059308affa..8edba9de76df 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -371,7 +371,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) dev_dbg(dev->dev, "transfer: %s %d bytes.\n", (dev->msg->flags & I2C_M_RD) ? "read" : "write", dev->buf_len); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->transfer_status = 0; if (!dev->buf_len) { diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index ea4b08fc3353..d7e8600f31fb 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -151,7 +151,7 @@ static int bcm2835_i2c_xfer_msg(struct bcm2835_i2c_dev *i2c_dev, i2c_dev->msg_buf = msg->buf; i2c_dev->msg_buf_remaining = msg->len; - INIT_COMPLETION(i2c_dev->completion); + reinit_completion(&i2c_dev->completion); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, BCM2835_I2C_C_CLEAR); diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 132369fad4e0..960dec61c64e 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -323,7 +323,7 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) davinci_i2c_write_reg(dev, DAVINCI_I2C_CNT_REG, dev->buf_len); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->cmd_err = 0; /* Take I2C out of reset and configure it as master */ diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 5888feef1ac5..e89e3e2145e5 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -613,7 +613,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) mutex_lock(&dev->lock); pm_runtime_get_sync(dev->dev); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->msgs = msgs; dev->msgs_num = num; dev->cmd_err = 0; diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index 1672effbcebb..0043ede234c2 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -541,7 +541,7 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, desc->dptr_high = upper_32_bits(dma_addr); } - INIT_COMPLETION(priv->cmp); + reinit_completion(&priv->cmp); /* Add the descriptor */ ismt_submit_desc(priv); diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index b7c857774708..3aedd86a6468 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -505,7 +505,7 @@ static int mxs_i2c_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, return err; } } else { - INIT_COMPLETION(i2c->cmd_complete); + reinit_completion(&i2c->cmd_complete); ret = mxs_i2c_dma_setup_xfer(adap, msg, flags); if (ret) return ret; diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 9967a6f9c2ff..a6a891d7970d 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -543,7 +543,7 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap, w |= OMAP_I2C_BUF_RXFIF_CLR | OMAP_I2C_BUF_TXFIF_CLR; omap_i2c_write_reg(dev, OMAP_I2C_BUF_REG, w); - INIT_COMPLETION(dev->cmd_complete); + reinit_completion(&dev->cmd_complete); dev->cmd_err = 0; w = OMAP_I2C_CON_EN | OMAP_I2C_CON_MST | OMAP_I2C_CON_STT; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index c457cb447c66..e661edee4d0c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -544,7 +544,7 @@ static int tegra_i2c_xfer_msg(struct tegra_i2c_dev *i2c_dev, i2c_dev->msg_buf_remaining = msg->len; i2c_dev->msg_err = I2C_ERR_NONE; i2c_dev->msg_read = (msg->flags & I2C_M_RD); - INIT_COMPLETION(i2c_dev->msg_complete); + reinit_completion(&i2c_dev->msg_complete); packet_header = (0 << PACKET_HEADER0_HEADER_SIZE_SHIFT) | PACKET_HEADER0_PROTOCOL_I2C | diff --git a/drivers/i2c/busses/i2c-wmt.c b/drivers/i2c/busses/i2c-wmt.c index c65da3d913a0..31395fa8121d 100644 --- a/drivers/i2c/busses/i2c-wmt.c +++ b/drivers/i2c/busses/i2c-wmt.c @@ -158,7 +158,7 @@ static int wmt_i2c_write(struct i2c_adapter *adap, struct i2c_msg *pmsg, writew(val, i2c_dev->base + REG_CR); } - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); if (i2c_dev->mode == I2C_MODE_STANDARD) tcr_val = TCR_STANDARD_MODE; @@ -247,7 +247,7 @@ static int wmt_i2c_read(struct i2c_adapter *adap, struct i2c_msg *pmsg, writew(val, i2c_dev->base + REG_CR); } - INIT_COMPLETION(i2c_dev->complete); + reinit_completion(&i2c_dev->complete); if (i2c_dev->mode == I2C_MODE_STANDARD) tcr_val = TCR_STANDARD_MODE; diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index e6fbd3e70981..9a4e0e32a771 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -188,7 +188,7 @@ static int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); ret = ad_sigma_delta_set_mode(sigma_delta, mode); if (ret < 0) @@ -259,7 +259,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, spi_bus_lock(sigma_delta->spi->master); sigma_delta->bus_locked = true; - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_SINGLE); @@ -343,7 +343,7 @@ static int ad_sd_buffer_postdisable(struct iio_dev *indio_dev) { struct ad_sigma_delta *sigma_delta = iio_device_get_drvdata(indio_dev); - INIT_COMPLETION(sigma_delta->completion); + reinit_completion(&sigma_delta->completion); wait_for_completion_timeout(&sigma_delta->completion, HZ); if (!sigma_delta->irq_dis) { diff --git a/drivers/iio/adc/nau7802.c b/drivers/iio/adc/nau7802.c index 54c5babe6746..e525aa6475c4 100644 --- a/drivers/iio/adc/nau7802.c +++ b/drivers/iio/adc/nau7802.c @@ -190,7 +190,7 @@ static int nau7802_read_irq(struct iio_dev *indio_dev, struct nau7802_state *st = iio_priv(indio_dev); int ret; - INIT_COMPLETION(st->value_ok); + reinit_completion(&st->value_ok); enable_irq(st->client->irq); nau7802_sync(st); diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c index d53e0b72a407..4204841cdc49 100644 --- a/drivers/input/touchscreen/cyttsp_core.c +++ b/drivers/input/touchscreen/cyttsp_core.c @@ -242,7 +242,7 @@ static int cyttsp_soft_reset(struct cyttsp *ts) int retval; /* wait for interrupt to set ready completion */ - INIT_COMPLETION(ts->bl_ready); + reinit_completion(&ts->bl_ready); ts->state = CY_BL_STATE; enable_irq(ts->irq); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 50ea7ed24dce..81b0fa660452 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -950,7 +950,7 @@ static int crypt_convert(struct crypt_config *cc, /* async */ case -EBUSY: wait_for_completion(&ctx->restart); - INIT_COMPLETION(ctx->restart); + reinit_completion(&ctx->restart); /* fall through*/ case -EINPROGRESS: this_cc->req = NULL; diff --git a/drivers/media/platform/blackfin/bfin_capture.c b/drivers/media/platform/blackfin/bfin_capture.c index 4c1105977090..281916591437 100644 --- a/drivers/media/platform/blackfin/bfin_capture.c +++ b/drivers/media/platform/blackfin/bfin_capture.c @@ -422,7 +422,7 @@ static int bcap_start_streaming(struct vb2_queue *vq, unsigned int count) return ret; } - INIT_COMPLETION(bcap_dev->comp); + reinit_completion(&bcap_dev->comp); bcap_dev->stop = false; return 0; } diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 97c2c18803ef..9cf6731fb816 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -375,7 +375,7 @@ static int wl1273_fm_set_tx_freq(struct wl1273_device *radio, unsigned int freq) if (r) return r; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); /* wait for the FR IRQ */ r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(2000)); @@ -389,7 +389,7 @@ static int wl1273_fm_set_tx_freq(struct wl1273_device *radio, unsigned int freq) if (r) return r; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); /* wait for the POWER_ENB IRQ */ r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(1000)); @@ -444,7 +444,7 @@ static int wl1273_fm_set_rx_freq(struct wl1273_device *radio, unsigned int freq) goto err; } - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); r = wait_for_completion_timeout(&radio->busy, msecs_to_jiffies(2000)); if (!r) { @@ -805,7 +805,7 @@ static int wl1273_fm_set_seek(struct wl1273_device *radio, if (level < SCHAR_MIN || level > SCHAR_MAX) return -EINVAL; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); dev_dbg(radio->dev, "%s: BUSY\n", __func__); r = core->write(core, WL1273_INT_MASK_SET, radio->irq_flags); @@ -847,7 +847,7 @@ static int wl1273_fm_set_seek(struct wl1273_device *radio, if (r) goto out; - INIT_COMPLETION(radio->busy); + reinit_completion(&radio->busy); dev_dbg(radio->dev, "%s: BUSY\n", __func__); r = core->write(core, WL1273_TUNER_MODE_SET, TUNER_MODE_AUTO_SEEK); diff --git a/drivers/media/radio/si470x/radio-si470x-common.c b/drivers/media/radio/si470x/radio-si470x-common.c index 5c57e5b0f949..0bd250068285 100644 --- a/drivers/media/radio/si470x/radio-si470x-common.c +++ b/drivers/media/radio/si470x/radio-si470x-common.c @@ -218,7 +218,7 @@ static int si470x_set_chan(struct si470x_device *radio, unsigned short chan) goto done; /* wait till tune operation has completed */ - INIT_COMPLETION(radio->completion); + reinit_completion(&radio->completion); retval = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(tune_timeout)); if (!retval) @@ -341,7 +341,7 @@ static int si470x_set_seek(struct si470x_device *radio, return retval; /* wait till tune operation has completed */ - INIT_COMPLETION(radio->completion); + reinit_completion(&radio->completion); retval = wait_for_completion_timeout(&radio->completion, msecs_to_jiffies(seek_timeout)); if (!retval) diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 19632b1c2190..b53626ba6f49 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -207,7 +207,7 @@ static int iguanair_send(struct iguanair *ir, unsigned size) { int rc; - INIT_COMPLETION(ir->completion); + reinit_completion(&ir->completion); ir->urb_out->transfer_buffer_length = size; rc = usb_submit_urb(ir->urb_out, GFP_KERNEL); diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index bbf4aea1627d..a0547dbf9806 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -253,7 +253,7 @@ void memstick_new_req(struct memstick_host *host) { if (host->card) { host->retries = cmd_retries; - INIT_COMPLETION(host->card->mrq_complete); + reinit_completion(&host->card->mrq_complete); host->request(host); } } diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index 1b6e91345222..31727bf285d0 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -290,7 +290,7 @@ static int r592_transfer_fifo_dma(struct r592_device *dev) dbg_verbose("doing dma transfer"); dev->dma_error = 0; - INIT_COMPLETION(dev->dma_done); + reinit_completion(&dev->dma_done); /* TODO: hidden assumption about nenth beeing always 1 */ sg_count = dma_map_sg(&dev->pci_dev->dev, &dev->req->sg, 1, is_write ? diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c index 914cc9b2caad..8aa42e738acc 100644 --- a/drivers/misc/mic/card/mic_virtio.c +++ b/drivers/misc/mic/card/mic_virtio.c @@ -493,7 +493,7 @@ static int mic_remove_device(struct mic_device_desc __iomem *d, ioread8(&dc->config_change), ioread8(&d->type), mvdev); status = ioread8(&d->status); - INIT_COMPLETION(mvdev->reset_done); + reinit_completion(&mvdev->reset_done); unregister_virtio_device(&mvdev->vdev); mic_free_card_irq(mvdev->virtio_cookie, mvdev); if (status & VIRTIO_CONFIG_S_DRIVER_OK) diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index b079c65eed6d..7558d9186438 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -38,7 +38,7 @@ static void mic_reset(struct mic_device *mdev) #define MIC_RESET_TO (45) - INIT_COMPLETION(mdev->reset_wait); + reinit_completion(&mdev->reset_wait); mdev->ops->reset_fw_ready(mdev); mdev->ops->reset(mdev); diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 83907c720594..96853a09788a 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -218,7 +218,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_debug("%s", __func__); - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); if (4 != st_int_write(kim_gdata->core_data, read_ver_cmd, 4)) { pr_err("kim: couldn't write 4 bytes"); return -EIO; @@ -229,7 +229,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_err(" waiting for ver info- timed out "); return -ETIMEDOUT; } - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); /* the positions 12 & 13 in the response buffer provide with the * chip, major & minor numbers */ @@ -362,7 +362,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) /* reinit completion before sending for the * relevant wait */ - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); /* * Free space found in uart buffer, call st_int_write @@ -398,7 +398,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) release_firmware(kim_gdata->fw_entry); return -ETIMEDOUT; } - INIT_COMPLETION(kim_gdata->kim_rcvd); + reinit_completion(&kim_gdata->kim_rcvd); break; case ACTION_DELAY: /* sleep */ pr_info("sleep command in scr"); @@ -474,7 +474,7 @@ long st_kim_start(void *kim_data) gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); mdelay(100); /* re-initialize the completion */ - INIT_COMPLETION(kim_gdata->ldisc_installed); + reinit_completion(&kim_gdata->ldisc_installed); /* send notification to UIM */ kim_gdata->ldisc_install = 1; pr_info("ldisc_install = 1"); @@ -525,7 +525,7 @@ long st_kim_stop(void *kim_data) kim_gdata->kim_pdev->dev.platform_data; struct tty_struct *tty = kim_gdata->core_data->tty; - INIT_COMPLETION(kim_gdata->ldisc_installed); + reinit_completion(&kim_gdata->ldisc_installed); if (tty) { /* can be called before ldisc is installed */ /* Flush any pending characters in the driver and discipline. */ diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c index 4edea7f4462f..9dfdb06c508b 100644 --- a/drivers/mtd/nand/mxc_nand.c +++ b/drivers/mtd/nand/mxc_nand.c @@ -396,7 +396,7 @@ static void wait_op_done(struct mxc_nand_host *host, int useirq) if (useirq) { if (!host->devtype_data->check_int(host)) { - INIT_COMPLETION(host->op_completion); + reinit_completion(&host->op_completion); irq_control(host, 1); wait_for_completion(&host->op_completion); } diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index 9dcf02d22aa8..325930db3f04 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -181,7 +181,7 @@ static void r852_do_dma(struct r852_device *dev, uint8_t *buf, int do_read) /* Set dma direction */ dev->dma_dir = do_read; dev->dma_stage = 1; - INIT_COMPLETION(dev->dma_done); + reinit_completion(&dev->dma_done); dbg_verbose("doing dma %s ", do_read ? "read" : "write"); diff --git a/drivers/mtd/onenand/omap2.c b/drivers/mtd/onenand/omap2.c index 2362909d20c0..6547c84afc3a 100644 --- a/drivers/mtd/onenand/omap2.c +++ b/drivers/mtd/onenand/omap2.c @@ -159,7 +159,7 @@ static int omap2_onenand_wait(struct mtd_info *mtd, int state) syscfg = read_reg(c, ONENAND_REG_SYS_CFG1); } - INIT_COMPLETION(c->irq_done); + reinit_completion(&c->irq_done); if (c->gpio_irq) { result = gpio_get_value(c->gpio_irq); if (result == -1) { @@ -349,7 +349,7 @@ static int omap3_onenand_read_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); timeout = jiffies + msecs_to_jiffies(20); @@ -420,7 +420,7 @@ static int omap3_onenand_write_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); timeout = jiffies + msecs_to_jiffies(20); @@ -499,7 +499,7 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); wait_for_completion(&c->dma_done); @@ -544,7 +544,7 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, omap_set_dma_dest_params(c->dma_channel, 0, OMAP_DMA_AMODE_POST_INC, dma_dst, 0, 0); - INIT_COMPLETION(c->dma_done); + reinit_completion(&c->dma_done); omap_start_dma(c->dma_channel); wait_for_completion(&c->dma_done); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index 09810ddd11ec..a01a6a74ee3a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3537,7 +3537,7 @@ int qlcnic_83xx_resume(struct qlcnic_adapter *adapter) void qlcnic_83xx_reinit_mbx_work(struct qlcnic_mailbox *mbx) { - INIT_COMPLETION(mbx->completion); + reinit_completion(&mbx->completion); set_bit(QLC_83XX_MBX_READY, &mbx->status); } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 6f10b4964726..2cbe1c249996 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -561,7 +561,7 @@ at86rf230_xmit(struct ieee802154_dev *dev, struct sk_buff *skb) spin_lock_irqsave(&lp->lock, flags); lp->is_tx = 1; - INIT_COMPLETION(lp->tx_complete); + reinit_completion(&lp->tx_complete); spin_unlock_irqrestore(&lp->lock, flags); rc = at86rf230_write_fbuf(lp, skb->data, skb->len); diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c index 0632d34905c7..c6e46d6e9f75 100644 --- a/drivers/net/ieee802154/mrf24j40.c +++ b/drivers/net/ieee802154/mrf24j40.c @@ -343,7 +343,7 @@ static int mrf24j40_tx(struct ieee802154_dev *dev, struct sk_buff *skb) if (ret) goto err; - INIT_COMPLETION(devrec->tx_complete); + reinit_completion(&devrec->tx_complete); /* Set TXNTRIG bit of TXNCON to send packet */ ret = read_short_reg(devrec, REG_TXNCON, &val); diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 3118d7506734..edae50b52806 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -534,7 +534,7 @@ int ath10k_htc_wait_target(struct ath10k_htc *htc) u16 credit_count; u16 credit_size; - INIT_COMPLETION(htc->ctl_resp); + reinit_completion(&htc->ctl_resp); status = ath10k_hif_start(htc->ar); if (status) { @@ -669,7 +669,7 @@ int ath10k_htc_connect_service(struct ath10k_htc *htc, req_msg->flags = __cpu_to_le16(flags); req_msg->service_id = __cpu_to_le16(conn_req->service_id); - INIT_COMPLETION(htc->ctl_resp); + reinit_completion(&htc->ctl_resp); status = ath10k_htc_send(htc, ATH10K_HTC_EP_0, skb); if (status) { diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0b1cc516e778..97ac8c87cba2 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -92,7 +92,7 @@ static int ath10k_install_key(struct ath10k_vif *arvif, lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->install_key_done); + reinit_completion(&ar->install_key_done); ret = ath10k_send_key(arvif, key, cmd, macaddr); if (ret) @@ -438,7 +438,7 @@ static int ath10k_vdev_start(struct ath10k_vif *arvif) lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->vdev_setup_done); + reinit_completion(&ar->vdev_setup_done); arg.vdev_id = arvif->vdev_id; arg.dtim_period = arvif->dtim_period; @@ -491,7 +491,7 @@ static int ath10k_vdev_stop(struct ath10k_vif *arvif) lockdep_assert_held(&ar->conf_mutex); - INIT_COMPLETION(ar->vdev_setup_done); + reinit_completion(&ar->vdev_setup_done); ret = ath10k_wmi_vdev_stop(ar, arvif->vdev_id); if (ret) { @@ -1666,7 +1666,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) } spin_lock_bh(&ar->data_lock); - INIT_COMPLETION(ar->offchan_tx_completed); + reinit_completion(&ar->offchan_tx_completed); ar->offchan_tx_skb = skb; spin_unlock_bh(&ar->data_lock); @@ -2476,8 +2476,8 @@ static int ath10k_hw_scan(struct ieee80211_hw *hw, goto exit; } - INIT_COMPLETION(ar->scan.started); - INIT_COMPLETION(ar->scan.completed); + reinit_completion(&ar->scan.started); + reinit_completion(&ar->scan.completed); ar->scan.in_progress = true; ar->scan.aborting = false; ar->scan.is_roc = false; @@ -2832,9 +2832,9 @@ static int ath10k_remain_on_channel(struct ieee80211_hw *hw, goto exit; } - INIT_COMPLETION(ar->scan.started); - INIT_COMPLETION(ar->scan.completed); - INIT_COMPLETION(ar->scan.on_channel); + reinit_completion(&ar->scan.started); + reinit_completion(&ar->scan.completed); + reinit_completion(&ar->scan.on_channel); ar->scan.in_progress = true; ar->scan.aborting = false; ar->scan.is_roc = true; diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 307bc0ddff99..ca115f33746f 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -773,7 +773,7 @@ void carl9170_usb_stop(struct ar9170 *ar) complete_all(&ar->cmd_wait); /* This is required to prevent an early completion on _start */ - INIT_COMPLETION(ar->cmd_wait); + reinit_completion(&ar->cmd_wait); /* * Note: diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 0a2844c48a60..fd30cddd5882 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -250,7 +250,7 @@ int wil_reset(struct wil6210_priv *wil) /* init after reset */ wil->pending_connect_cid = -1; - INIT_COMPLETION(wil->wmi_ready); + reinit_completion(&wil->wmi_ready); /* TODO: release MAC reset */ wil6210_enable_irq(wil); diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c index d7a974532909..5b5b952d47b1 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c @@ -1148,7 +1148,7 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) pri_vif = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif; - INIT_COMPLETION(afx_hdl->act_frm_scan); + reinit_completion(&afx_hdl->act_frm_scan); set_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, &p2p->status); afx_hdl->is_active = true; afx_hdl->peer_chan = P2P_INVALID_CHANNEL; @@ -1501,7 +1501,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, brcmf_dbg(TRACE, "Enter\n"); - INIT_COMPLETION(p2p->send_af_done); + reinit_completion(&p2p->send_af_done); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 7ef0b4a181e1..84d94f572a46 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -1619,7 +1619,7 @@ static void prepare_read_regs_int(struct zd_usb *usb, atomic_set(&intr->read_regs_enabled, 1); intr->read_regs.req = req; intr->read_regs.req_count = count; - INIT_COMPLETION(intr->read_regs.completion); + reinit_completion(&intr->read_regs.completion); spin_unlock_irq(&intr->lock); } diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c index d4716273651e..c864f82bd37d 100644 --- a/drivers/parport/parport_ip32.c +++ b/drivers/parport/parport_ip32.c @@ -1331,7 +1331,7 @@ static unsigned int parport_ip32_fwp_wait_interrupt(struct parport *p) break; /* Initialize mutex used to take interrupts into account */ - INIT_COMPLETION(priv->irq_complete); + reinit_completion(&priv->irq_complete); /* Enable serviceIntr */ parport_ip32_frob_econtrol(p, ECR_SERVINTR, 0); @@ -1446,7 +1446,7 @@ static size_t parport_ip32_fifo_write_block_dma(struct parport *p, priv->irq_mode = PARPORT_IP32_IRQ_HERE; parport_ip32_dma_start(DMA_TO_DEVICE, (void *)buf, len); - INIT_COMPLETION(priv->irq_complete); + reinit_completion(&priv->irq_complete); parport_ip32_frob_econtrol(p, ECR_DMAEN | ECR_SERVINTR, ECR_DMAEN); nfault_timeout = min((unsigned long)physport->cad->timeout, diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 8eea2efbbb6d..605a9be55129 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -289,7 +289,7 @@ static int gmux_switchto(enum vga_switcheroo_client_id id) static int gmux_set_discrete_state(struct apple_gmux_data *gmux_data, enum vga_switcheroo_state state) { - INIT_COMPLETION(gmux_data->powerchange_done); + reinit_completion(&gmux_data->powerchange_done); if (state == VGA_SWITCHEROO_ON) { gmux_write8(gmux_data, GMUX_PORT_DISCRETE_POWER, 1); diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c index 754970717c31..3cb4178e397c 100644 --- a/drivers/power/ab8500_fg.c +++ b/drivers/power/ab8500_fg.c @@ -574,8 +574,8 @@ int ab8500_fg_inst_curr_start(struct ab8500_fg *di) } /* Return and WFI */ - INIT_COMPLETION(di->ab8500_fg_started); - INIT_COMPLETION(di->ab8500_fg_complete); + reinit_completion(&di->ab8500_fg_started); + reinit_completion(&di->ab8500_fg_complete); enable_irq(di->irq); /* Note: cc_lock is still locked */ diff --git a/drivers/power/jz4740-battery.c b/drivers/power/jz4740-battery.c index d9686aa9270a..6c8931d4ad62 100644 --- a/drivers/power/jz4740-battery.c +++ b/drivers/power/jz4740-battery.c @@ -73,7 +73,7 @@ static long jz_battery_read_voltage(struct jz_battery *battery) mutex_lock(&battery->lock); - INIT_COMPLETION(battery->read_completion); + reinit_completion(&battery->read_completion); enable_irq(battery->irq); battery->cell->enable(battery->pdev); diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c index a34e5cfd2ab5..965a9da70867 100644 --- a/drivers/rtc/rtc-hid-sensor-time.c +++ b/drivers/rtc/rtc-hid-sensor-time.c @@ -209,7 +209,7 @@ static int hid_rtc_read_time(struct device *dev, struct rtc_time *tm) platform_get_drvdata(to_platform_device(dev)); int ret; - INIT_COMPLETION(time_state->comp_last_time); + reinit_completion(&time_state->comp_last_time); /* get a report with all values through requesting one value */ sensor_hub_input_attr_get_raw_value(time_state->common_attributes.hsdev, HID_USAGE_SENSOR_TIME, hid_time_addresses[0], diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index 4c332143a310..3ed666fe840a 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -217,7 +217,7 @@ static int bcm2835_spi_start_transfer(struct spi_device *spi, cs |= spi->chip_select; } - INIT_COMPLETION(bs->done); + reinit_completion(&bs->done); bs->tx_buf = tfr->tx_buf; bs->rx_buf = tfr->rx_buf; bs->len = tfr->len; diff --git a/drivers/spi/spi-clps711x.c b/drivers/spi/spi-clps711x.c index e2a5a426b2ef..6f03d7e6435d 100644 --- a/drivers/spi/spi-clps711x.c +++ b/drivers/spi/spi-clps711x.c @@ -105,7 +105,7 @@ static int spi_clps711x_transfer_one_message(struct spi_master *master, gpio_set_value(cs, !!(msg->spi->mode & SPI_CS_HIGH)); - INIT_COMPLETION(hw->done); + reinit_completion(&hw->done); hw->count = 0; hw->len = xfer->len; diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index dd72445ba2ea..50b2d88c8190 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -554,7 +554,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t) clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK); set_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK); - INIT_COMPLETION(dspi->done); + reinit_completion(&dspi->done); if (spicfg->io_type == SPI_IO_TYPE_INTR) set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT); diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c index 32200d4f8780..80d8f40f7e05 100644 --- a/drivers/spi/spi-fsl-espi.c +++ b/drivers/spi/spi-fsl-espi.c @@ -232,7 +232,7 @@ static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t) mpc8xxx_spi->tx = t->tx_buf; mpc8xxx_spi->rx = t->rx_buf; - INIT_COMPLETION(mpc8xxx_spi->done); + reinit_completion(&mpc8xxx_spi->done); /* Set SPCOM[CS] and SPCOM[TRANLEN] field */ if ((t->len - 1) > SPCOM_TRANLEN_MAX) { diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 2129fcd1c31b..119f7af94537 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -339,7 +339,7 @@ static int fsl_spi_bufs(struct spi_device *spi, struct spi_transfer *t, mpc8xxx_spi->tx = t->tx_buf; mpc8xxx_spi->rx = t->rx_buf; - INIT_COMPLETION(mpc8xxx_spi->done); + reinit_completion(&mpc8xxx_spi->done); if (mpc8xxx_spi->flags & SPI_CPM_MODE) ret = fsl_spi_cpm_bufs(mpc8xxx_spi, t, is_dma_mapped); diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c index 58d5ee0e4443..9602bbd8d7ea 100644 --- a/drivers/spi/spi-mpc512x-psc.c +++ b/drivers/spi/spi-mpc512x-psc.c @@ -167,7 +167,7 @@ static int mpc512x_psc_spi_transfer_rxtx(struct spi_device *spi, } /* have the ISR trigger when the TX FIFO is empty */ - INIT_COMPLETION(mps->txisrdone); + reinit_completion(&mps->txisrdone); out_be32(&fifo->txisr, MPC512x_PSC_FIFO_EMPTY); out_be32(&fifo->tximr, MPC512x_PSC_FIFO_EMPTY); wait_for_completion(&mps->txisrdone); diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c index de333059a9a7..73afb56c08cc 100644 --- a/drivers/spi/spi-mxs.c +++ b/drivers/spi/spi-mxs.c @@ -202,7 +202,7 @@ static int mxs_spi_txrx_dma(struct mxs_spi *spi, if (!dma_xfer) return -ENOMEM; - INIT_COMPLETION(spi->c); + reinit_completion(&spi->c); /* Chip select was already programmed into CTRL0 */ ctrl0 = readl(ssp->base + HW_SSP_CTRL0); diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index 9e2020df9e0f..4c4b0a1219a7 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -890,7 +890,7 @@ static int s3c64xx_spi_transfer_one(struct spi_master *master, unsigned long flags; int use_dma; - INIT_COMPLETION(sdd->xfer_completion); + reinit_completion(&sdd->xfer_completion); /* Only BPW and Speed may change across transfers */ bpw = xfer->bits_per_word; diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index 2a95435a6a11..c74298cf70e2 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -465,7 +465,7 @@ static int sh_msiof_spi_txrx_once(struct sh_msiof_spi_priv *p, ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TXE); /* start by setting frame bit */ - INIT_COMPLETION(p->done); + reinit_completion(&p->done); ret = ret ? ret : sh_msiof_modify_ctr_wait(p, 0, CTR_TFSE); if (ret) { dev_err(&p->pdev->dev, "failed to start hardware\n"); diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c index 592b4aff651f..ed5e501c4652 100644 --- a/drivers/spi/spi-sirf.c +++ b/drivers/spi/spi-sirf.c @@ -305,8 +305,8 @@ static int spi_sirfsoc_transfer(struct spi_device *spi, struct spi_transfer *t) sspi->tx = t->tx_buf ? t->tx_buf : sspi->dummypage; sspi->rx = t->rx_buf ? t->rx_buf : sspi->dummypage; sspi->left_tx_word = sspi->left_rx_word = t->len / sspi->word_width; - INIT_COMPLETION(sspi->rx_done); - INIT_COMPLETION(sspi->tx_done); + reinit_completion(&sspi->rx_done); + reinit_completion(&sspi->tx_done); writel(SIRFSOC_SPI_INT_MASK_ALL, sspi->base + SIRFSOC_SPI_INT_STATUS); diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 9146bb3c2489..aaecfb3ebf58 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -451,7 +451,7 @@ static void tegra_spi_dma_complete(void *args) static int tegra_spi_start_tx_dma(struct tegra_spi_data *tspi, int len) { - INIT_COMPLETION(tspi->tx_dma_complete); + reinit_completion(&tspi->tx_dma_complete); tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan, tspi->tx_dma_phys, len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -470,7 +470,7 @@ static int tegra_spi_start_tx_dma(struct tegra_spi_data *tspi, int len) static int tegra_spi_start_rx_dma(struct tegra_spi_data *tspi, int len) { - INIT_COMPLETION(tspi->rx_dma_complete); + reinit_completion(&tspi->rx_dma_complete); tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan, tspi->rx_dma_phys, len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -844,7 +844,7 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, list_for_each_entry(xfer, &msg->transfers, transfer_list) { unsigned long cmd1; - INIT_COMPLETION(tspi->xfer_completion); + reinit_completion(&tspi->xfer_completion); cmd1 = tegra_spi_setup_transfer_one(spi, xfer, is_first_msg); diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c index 79be8ce6a9d1..4dc8e8129459 100644 --- a/drivers/spi/spi-tegra20-sflash.c +++ b/drivers/spi/spi-tegra20-sflash.c @@ -339,7 +339,7 @@ static int tegra_sflash_transfer_one_message(struct spi_master *master, msg->actual_length = 0; single_xfer = list_is_singular(&msg->transfers); list_for_each_entry(xfer, &msg->transfers, transfer_list) { - INIT_COMPLETION(tsd->xfer_completion); + reinit_completion(&tsd->xfer_completion); ret = tegra_sflash_start_transfer_one(spi, xfer, is_first_msg, single_xfer); if (ret < 0) { diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index af0a67886ae8..e66715ba37ed 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -462,7 +462,7 @@ static void tegra_slink_dma_complete(void *args) static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len) { - INIT_COMPLETION(tspi->tx_dma_complete); + reinit_completion(&tspi->tx_dma_complete); tspi->tx_dma_desc = dmaengine_prep_slave_single(tspi->tx_dma_chan, tspi->tx_dma_phys, len, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -481,7 +481,7 @@ static int tegra_slink_start_tx_dma(struct tegra_slink_data *tspi, int len) static int tegra_slink_start_rx_dma(struct tegra_slink_data *tspi, int len) { - INIT_COMPLETION(tspi->rx_dma_complete); + reinit_completion(&tspi->rx_dma_complete); tspi->rx_dma_desc = dmaengine_prep_slave_single(tspi->rx_dma_chan, tspi->rx_dma_phys, len, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -836,7 +836,7 @@ static int tegra_slink_transfer_one(struct spi_master *master, struct tegra_slink_data *tspi = spi_master_get_devdata(master); int ret; - INIT_COMPLETION(tspi->xfer_completion); + reinit_completion(&tspi->xfer_completion); ret = tegra_slink_start_transfer_one(spi, xfer); if (ret < 0) { dev_err(tspi->dev, diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index ec3a83f52ea2..6d4ce4615163 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -258,7 +258,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) xspi->tx_ptr = t->tx_buf; xspi->rx_ptr = t->rx_buf; xspi->remaining_bytes = t->len; - INIT_COMPLETION(xspi->done); + reinit_completion(&xspi->done); /* Enable the transmit empty interrupt, which we use to determine diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 927998aa5e71..8d85ddc46011 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -571,7 +571,7 @@ static int spi_transfer_one_message(struct spi_master *master, list_for_each_entry(xfer, &msg->transfers, transfer_list) { trace_spi_transfer_start(msg, xfer); - INIT_COMPLETION(master->xfer_completion); + reinit_completion(&master->xfer_completion); ret = master->transfer_one(master, msg->spi, xfer); if (ret < 0) { diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index aeae76b77be5..e2dd7830b320 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -783,7 +783,7 @@ static int mxs_lradc_read_raw(struct iio_dev *iio_dev, if (!ret) return -EBUSY; - INIT_COMPLETION(lradc->completion); + reinit_completion(&lradc->completion); /* * No buffered operation in progress, map the channel and trigger it. diff --git a/drivers/staging/media/solo6x10/solo6x10-p2m.c b/drivers/staging/media/solo6x10/solo6x10-p2m.c index 333594189b81..7f2f2472655b 100644 --- a/drivers/staging/media/solo6x10/solo6x10-p2m.c +++ b/drivers/staging/media/solo6x10/solo6x10-p2m.c @@ -87,7 +87,7 @@ int solo_p2m_dma_desc(struct solo_dev *solo_dev, if (mutex_lock_interruptible(&p2m_dev->mutex)) return -EINTR; - INIT_COMPLETION(p2m_dev->completion); + reinit_completion(&p2m_dev->completion); p2m_dev->error = 0; if (desc_cnt > 1 && solo_dev->type != SOLO_DEV_6110 && desc_mode) { diff --git a/drivers/staging/tidspbridge/core/sync.c b/drivers/staging/tidspbridge/core/sync.c index 7bb550acaf4a..743ff09d82d2 100644 --- a/drivers/staging/tidspbridge/core/sync.c +++ b/drivers/staging/tidspbridge/core/sync.c @@ -72,7 +72,7 @@ int sync_wait_on_multiple_events(struct sync_object **events, spin_lock_bh(&sync_lock); for (i = 0; i < count; i++) { if (completion_done(&events[i]->comp)) { - INIT_COMPLETION(events[i]->comp); + reinit_completion(&events[i]->comp); *index = i; spin_unlock_bh(&sync_lock); status = 0; @@ -92,7 +92,7 @@ int sync_wait_on_multiple_events(struct sync_object **events, spin_lock_bh(&sync_lock); for (i = 0; i < count; i++) { if (completion_done(&events[i]->comp)) { - INIT_COMPLETION(events[i]->comp); + reinit_completion(&events[i]->comp); *index = i; status = 0; } diff --git a/drivers/staging/tidspbridge/include/dspbridge/sync.h b/drivers/staging/tidspbridge/include/dspbridge/sync.h index 58a0d5c5543d..fc19b9707087 100644 --- a/drivers/staging/tidspbridge/include/dspbridge/sync.h +++ b/drivers/staging/tidspbridge/include/dspbridge/sync.h @@ -59,7 +59,7 @@ static inline void sync_init_event(struct sync_object *event) static inline void sync_reset_event(struct sync_object *event) { - INIT_COMPLETION(event->comp); + reinit_completion(&event->comp); event->multi_comp = NULL; } diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c index 6d04eb48bfbc..1aa4a3fd0f1b 100644 --- a/drivers/staging/tidspbridge/rmgr/drv_interface.c +++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c @@ -332,7 +332,7 @@ static void bridge_recover(struct work_struct *work) struct dev_object *dev; struct cfg_devnode *dev_node; if (atomic_read(&bridge_cref)) { - INIT_COMPLETION(bridge_comp); + reinit_completion(&bridge_comp); while (!wait_for_completion_timeout(&bridge_comp, msecs_to_jiffies(REC_TIMEOUT))) pr_info("%s:%d handle(s) still opened\n", @@ -348,7 +348,7 @@ static void bridge_recover(struct work_struct *work) void bridge_recover_schedule(void) { - INIT_COMPLETION(bridge_open_comp); + reinit_completion(&bridge_open_comp); recover = true; queue_work(bridge_rec_queue, &bridge_recovery_work); } @@ -389,7 +389,7 @@ static int omap3_bridge_startup(struct platform_device *pdev) #ifdef CONFIG_TIDSPBRIDGE_RECOVERY bridge_rec_queue = create_workqueue("bridge_rec_queue"); INIT_WORK(&bridge_recovery_work, bridge_recover); - INIT_COMPLETION(bridge_comp); + reinit_completion(&bridge_comp); #endif #ifdef CONFIG_PM diff --git a/drivers/tty/metag_da.c b/drivers/tty/metag_da.c index 0e888621f484..7332e2ca4615 100644 --- a/drivers/tty/metag_da.c +++ b/drivers/tty/metag_da.c @@ -495,7 +495,7 @@ static int dashtty_write(struct tty_struct *tty, const unsigned char *buf, count = dport->xmit_cnt; /* xmit buffer no longer empty? */ if (count) - INIT_COMPLETION(dport->xmit_empty); + reinit_completion(&dport->xmit_empty); mutex_unlock(&dport->xmit_lock); if (total) { diff --git a/drivers/usb/c67x00/c67x00-sched.c b/drivers/usb/c67x00/c67x00-sched.c index aa491627a45b..892cc96466eb 100644 --- a/drivers/usb/c67x00/c67x00-sched.c +++ b/drivers/usb/c67x00/c67x00-sched.c @@ -344,7 +344,7 @@ void c67x00_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *ep) /* it could happen that we reinitialize this completion, while * somebody was waiting for that completion. The timeout and * while loop handle such cases, but this might be improved */ - INIT_COMPLETION(c67x00->endpoint_disable); + reinit_completion(&c67x00->endpoint_disable); c67x00_sched_kick(c67x00); wait_for_completion_timeout(&c67x00->endpoint_disable, 1 * HZ); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index 44cf775a8627..774e8b89cdb5 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -373,7 +373,7 @@ static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len) if (req->buf == NULL) req->buf = (void *)0xDEADBABE; - INIT_COMPLETION(ffs->ep0req_completion); + reinit_completion(&ffs->ep0req_completion); ret = usb_ep_queue(ffs->gadget->ep0, req, GFP_ATOMIC); if (unlikely(ret < 0)) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 84657e07dc5d..439c951f261b 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -455,7 +455,7 @@ static int parport_prologue(struct parport *pp) return -1; } mos_parport->msg_pending = true; /* synch usb call pending */ - INIT_COMPLETION(mos_parport->syncmsg_compl); + reinit_completion(&mos_parport->syncmsg_compl); spin_unlock(&release_lock); mutex_lock(&mos_parport->serial->disc_mutex); diff --git a/drivers/video/exynos/exynos_mipi_dsi_common.c b/drivers/video/exynos/exynos_mipi_dsi_common.c index 7eed957b6014..85edabfdef5a 100644 --- a/drivers/video/exynos/exynos_mipi_dsi_common.c +++ b/drivers/video/exynos/exynos_mipi_dsi_common.c @@ -220,7 +220,7 @@ int exynos_mipi_dsi_wr_data(struct mipi_dsim_device *dsim, unsigned int data_id, case MIPI_DSI_DCS_LONG_WRITE: { unsigned int size, payload = 0; - INIT_COMPLETION(dsim_wr_comp); + reinit_completion(&dsim_wr_comp); size = data_size * 4; @@ -356,7 +356,7 @@ int exynos_mipi_dsi_rd_data(struct mipi_dsim_device *dsim, unsigned int data_id, msleep(20); mutex_lock(&dsim->lock); - INIT_COMPLETION(dsim_rd_comp); + reinit_completion(&dsim_rd_comp); exynos_mipi_dsi_rd_tx_header(dsim, MIPI_DSI_SET_MAXIMUM_RETURN_PACKET_SIZE, req_size); diff --git a/drivers/video/omap2/displays-new/encoder-tpd12s015.c b/drivers/video/omap2/displays-new/encoder-tpd12s015.c index 798ef200b055..d5c936cb217f 100644 --- a/drivers/video/omap2/displays-new/encoder-tpd12s015.c +++ b/drivers/video/omap2/displays-new/encoder-tpd12s015.c @@ -69,7 +69,7 @@ static int tpd_connect(struct omap_dss_device *dssdev, dst->src = dssdev; dssdev->dst = dst; - INIT_COMPLETION(ddata->hpd_completion); + reinit_completion(&ddata->hpd_completion); gpio_set_value_cansleep(ddata->ct_cp_hpd_gpio, 1); /* DC-DC converter needs at max 300us to get to 90% of 5V */ diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 000eae2782b6..2f6735dbf1a9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -392,7 +392,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, wait_for_completion(&ecr->completion); rc = ecr->rc; - INIT_COMPLETION(ecr->completion); + reinit_completion(&ecr->completion); } out: ablkcipher_request_free(req); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index c8e729deb4f7..74a7e12e10df 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -244,7 +244,7 @@ static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid != NFS4_NO_SLOT) { - INIT_COMPLETION(tbl->complete); + reinit_completion(&tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 3a44a648dae7..3407b2c62b21 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -1304,7 +1304,7 @@ static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw) { wait_for_completion(&mw->mw_complete); /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return mw->mw_status; } @@ -1355,7 +1355,7 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, else ret = mw->mw_status; /* Re-arm the completion in case we want to wait on it again */ - INIT_COMPLETION(mw->mw_complete); + reinit_completion(&mw->mw_complete); return ret; } diff --git a/sound/firewire/dice.c b/sound/firewire/dice.c index 6feee6614193..57bcd31fcc12 100644 --- a/sound/firewire/dice.c +++ b/sound/firewire/dice.c @@ -543,7 +543,7 @@ static int dice_change_rate(struct dice *dice, unsigned int clock_rate) __be32 value; int err; - INIT_COMPLETION(dice->clock_accepted); + reinit_completion(&dice->clock_accepted); value = cpu_to_be32(clock_rate | CLOCK_SOURCE_ARX1); err = snd_fw_transaction(dice->unit, TCODE_WRITE_QUADLET_REQUEST, diff --git a/sound/soc/samsung/ac97.c b/sound/soc/samsung/ac97.c index 2acf987844e8..350ba23a9893 100644 --- a/sound/soc/samsung/ac97.c +++ b/sound/soc/samsung/ac97.c @@ -74,7 +74,7 @@ static void s3c_ac97_activate(struct snd_ac97 *ac97) if (stat == S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE) return; /* Return if already active */ - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_glbctrl = readl(s3c_ac97.regs + S3C_AC97_GLBCTRL); ac_glbctrl = S3C_AC97_GLBCTRL_ACLINKON; @@ -103,7 +103,7 @@ static unsigned short s3c_ac97_read(struct snd_ac97 *ac97, s3c_ac97_activate(ac97); - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_codec_cmd = readl(s3c_ac97.regs + S3C_AC97_CODEC_CMD); ac_codec_cmd = S3C_AC97_CODEC_CMD_READ | AC_CMD_ADDR(reg); @@ -140,7 +140,7 @@ static void s3c_ac97_write(struct snd_ac97 *ac97, unsigned short reg, s3c_ac97_activate(ac97); - INIT_COMPLETION(s3c_ac97.done); + reinit_completion(&s3c_ac97.done); ac_codec_cmd = readl(s3c_ac97.regs + S3C_AC97_CODEC_CMD); ac_codec_cmd = AC_CMD_ADDR(reg) | AC_CMD_DATA(val); From 62026aedaacedbe1ffe94a3599ad4acd8ecdf587 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 14 Nov 2013 14:32:03 -0800 Subject: [PATCH 59/72] sched: remove INIT_COMPLETION All users are converted over to reinit_completion(). Remove the old macro now. Signed-off-by: Wolfram Sang Cc: Linus Walleij Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/completion.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/linux/completion.h b/include/linux/completion.h index 124e4b4334c1..5d5aaae3af43 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -106,14 +106,4 @@ extern bool completion_done(struct completion *x); extern void complete(struct completion *); extern void complete_all(struct completion *); -/** - * INIT_COMPLETION - reinitialize a completion structure - * @x: completion structure to be reinitialized - * - * This macro should be used to reinitialize a completion structure so it can - * be reused. This is especially important after complete_all() is used. - */ -#define INIT_COMPLETION(x) ((x).done = 0) - - #endif From c853b167e6ec1f25023cfc58ba2f43f9f6f5b49b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 14 Nov 2013 14:32:04 -0800 Subject: [PATCH 60/72] drivers/w1/masters/w1-gpio.c: use dev_get_platdata() Use the wrapper function for retrieving the platform data instead of accessing dev->platform_data directly. This is a cosmetic change to make the code simpler and enhance the readability. Signed-off-by: Jingoo Han Acked-by: Evgeniy Polyakov Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/masters/w1-gpio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index 264ad1c583ab..e36b18b2817b 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -56,7 +56,7 @@ MODULE_DEVICE_TABLE(of, w1_gpio_dt_ids); static int w1_gpio_probe_dt(struct platform_device *pdev) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device_node *np = pdev->dev.of_node; int gpio; @@ -92,7 +92,7 @@ static int w1_gpio_probe(struct platform_device *pdev) } } - pdata = pdev->dev.platform_data; + pdata = dev_get_platdata(&pdev->dev); if (!pdata) { dev_err(&pdev->dev, "No configuration data\n"); @@ -154,7 +154,7 @@ static int w1_gpio_probe(struct platform_device *pdev) static int w1_gpio_remove(struct platform_device *pdev) { struct w1_bus_master *master = platform_get_drvdata(pdev); - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(0); @@ -171,7 +171,7 @@ static int w1_gpio_remove(struct platform_device *pdev) static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(0); @@ -181,7 +181,7 @@ static int w1_gpio_suspend(struct platform_device *pdev, pm_message_t state) static int w1_gpio_resume(struct platform_device *pdev) { - struct w1_gpio_platform_data *pdata = pdev->dev.platform_data; + struct w1_gpio_platform_data *pdata = dev_get_platdata(&pdev->dev); if (pdata->enable_external_pullup) pdata->enable_external_pullup(1); From fc21c0cff2f425891b28ff6fb6b03b325c977428 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:06 -0800 Subject: [PATCH 61/72] revert "softirq: Add support for triggering softirq work on softirqs" This commit was incomplete in that code to remove items from the per-cpu lists was missing and never acquired a user in the 5 years it has been in the tree. We're going to implement what it seems to try to archive in a simpler way, and this code is in the way of doing so. Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 22 ------- kernel/softirq.c | 131 -------------------------------------- 2 files changed, 153 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c9e831dc80bc..db43b58a3355 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include #include @@ -392,15 +390,6 @@ extern void __raise_softirq_irqoff(unsigned int nr); extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); -/* This is the worklist that queues up per-cpu softirq work. - * - * send_remote_sendirq() adds work to these lists, and - * the softirq handler itself dequeues from them. The queues - * are protected by disabling local cpu interrupts and they must - * only be accessed by the local cpu that they are for. - */ -DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); - DECLARE_PER_CPU(struct task_struct *, ksoftirqd); static inline struct task_struct *this_cpu_ksoftirqd(void) @@ -408,17 +397,6 @@ static inline struct task_struct *this_cpu_ksoftirqd(void) return this_cpu_read(ksoftirqd); } -/* Try to send a softirq to a remote cpu. If this cannot be done, the - * work will be queued to the local cpu. - */ -extern void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq); - -/* Like send_remote_softirq(), but the caller must disable local cpu interrupts - * and compute the current cpu, passed in as 'this_cpu'. - */ -extern void __send_remote_softirq(struct call_single_data *cp, int cpu, - int this_cpu, int softirq); - /* Tasklets --- multithreaded analogue of BHs. Main feature differing them of generic softirqs: tasklet diff --git a/kernel/softirq.c b/kernel/softirq.c index b24988353458..11025ccc06dd 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -6,8 +6,6 @@ * Distribute under GPLv2. * * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903) - * - * Remote softirq infrastructure is by Jens Axboe. */ #include @@ -627,146 +625,17 @@ void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer, } EXPORT_SYMBOL_GPL(tasklet_hrtimer_init); -/* - * Remote softirq bits - */ - -DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); -EXPORT_PER_CPU_SYMBOL(softirq_work_list); - -static void __local_trigger(struct call_single_data *cp, int softirq) -{ - struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]); - - list_add_tail(&cp->list, head); - - /* Trigger the softirq only if the list was previously empty. */ - if (head->next == &cp->list) - raise_softirq_irqoff(softirq); -} - -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS -static void remote_softirq_receive(void *data) -{ - struct call_single_data *cp = data; - unsigned long flags; - int softirq; - - softirq = *(int *)cp->info; - local_irq_save(flags); - __local_trigger(cp, softirq); - local_irq_restore(flags); -} - -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - if (cpu_online(cpu)) { - cp->func = remote_softirq_receive; - cp->info = &softirq; - cp->flags = 0; - - __smp_call_function_single(cpu, cp, 0); - return 0; - } - return 1; -} -#else /* CONFIG_USE_GENERIC_SMP_HELPERS */ -static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - return 1; -} -#endif - -/** - * __send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @this_cpu: the currently executing cpu - * @softirq: the softirq for the work - * - * Attempt to schedule softirq work on a remote cpu. If this cannot be - * done, the work is instead queued up on the local cpu. - * - * Interrupts must be disabled. - */ -void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq) -{ - if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq)) - __local_trigger(cp, softirq); -} -EXPORT_SYMBOL(__send_remote_softirq); - -/** - * send_remote_softirq - try to schedule softirq work on a remote cpu - * @cp: private SMP call function data area - * @cpu: the remote cpu - * @softirq: the softirq for the work - * - * Like __send_remote_softirq except that disabling interrupts and - * computing the current cpu is done for the caller. - */ -void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq) -{ - unsigned long flags; - int this_cpu; - - local_irq_save(flags); - this_cpu = smp_processor_id(); - __send_remote_softirq(cp, cpu, this_cpu, softirq); - local_irq_restore(flags); -} -EXPORT_SYMBOL(send_remote_softirq); - -static int remote_softirq_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - int i; - - local_irq_disable(); - for (i = 0; i < NR_SOFTIRQS; i++) { - struct list_head *head = &per_cpu(softirq_work_list[i], cpu); - struct list_head *local_head; - - if (list_empty(head)) - continue; - - local_head = &__get_cpu_var(softirq_work_list[i]); - list_splice_init(head, local_head); - raise_softirq_irqoff(i); - } - local_irq_enable(); - } - - return NOTIFY_OK; -} - -static struct notifier_block remote_softirq_cpu_notifier = { - .notifier_call = remote_softirq_cpu_notify, -}; - void __init softirq_init(void) { int cpu; for_each_possible_cpu(cpu) { - int i; - per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head; - for (i = 0; i < NR_SOFTIRQS; i++) - INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu)); } - register_hotcpu_notifier(&remote_softirq_cpu_notifier); - open_softirq(TASKLET_SOFTIRQ, tasklet_action); open_softirq(HI_SOFTIRQ, tasklet_hi_action); } From 0a06ff068f1255bcd7965ab07bc0f4adc3eb639a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:07 -0800 Subject: [PATCH 62/72] kernel: remove CONFIG_USE_GENERIC_SMP_HELPERS We've switched over every architecture that supports SMP to it, so remove the new useless config variable. Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 --- arch/alpha/Kconfig | 1 - arch/arc/Kconfig | 1 - arch/arm/Kconfig | 1 - arch/arm64/Kconfig | 1 - arch/blackfin/Kconfig | 1 - arch/hexagon/Kconfig | 1 - arch/ia64/Kconfig | 1 - arch/m32r/Kconfig | 1 - arch/metag/Kconfig | 1 - arch/mips/Kconfig | 1 - arch/mn10300/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/s390/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/sparc/Kconfig | 1 - arch/tile/Kconfig | 1 - arch/x86/Kconfig | 1 - block/blk-mq.c | 4 ++-- block/blk-softirq.c | 4 ++-- block/blk-sysfs.c | 2 +- include/linux/smp.h | 4 ---- kernel/Kconfig.hz | 2 +- kernel/smp.c | 2 -- 25 files changed, 6 insertions(+), 33 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index ded747c7b74c..f1cf895c040f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -207,9 +207,6 @@ config HAVE_DMA_ATTRS config HAVE_DMA_CONTIGUOUS bool -config USE_GENERIC_SMP_HELPERS - bool - config GENERIC_SMP_IDLE_THREAD bool diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 35a300d4a9fb..8d2a4833acda 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -522,7 +522,6 @@ config ARCH_MAY_HAVE_PC_FDC config SMP bool "Symmetric multi-processing support" depends on ALPHA_SABLE || ALPHA_LYNX || ALPHA_RAWHIDE || ALPHA_DP264 || ALPHA_WILDFIRE || ALPHA_TITAN || ALPHA_GENERIC || ALPHA_SHARK || ALPHA_MARVEL - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5ede5460c806..2ee0c9bfd032 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -125,7 +125,6 @@ config ARC_PLAT_NEEDS_CPU_TO_DMA config SMP bool "Symmetric Multi-Processing (Incomplete)" default n - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 603d661b445d..00c1ff45a158 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1432,7 +1432,6 @@ config SMP depends on GENERIC_CLOCKEVENTS depends on HAVE_SMP depends on MMU || ARM_MPU - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bb0bf1bfc05d..9714fe0403b7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,7 +143,6 @@ config CPU_BIG_ENDIAN config SMP bool "Symmetric Multi-Processing" - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you say N here, the kernel will run on single and diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index e887b57c3176..9ceccef9c649 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -34,7 +34,6 @@ config BLACKFIN select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_ATOMIC64 select GENERIC_IRQ_PROBE - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_NMI_WATCHDOG if NMI_WATCHDOG select GENERIC_SMP_IDLE_THREAD select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 99041b07e610..09df2608f40a 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -4,7 +4,6 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y select HAVE_OPROFILE - select USE_GENERIC_SMP_HELPERS if SMP # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API # select HAVE_HW_BREAKPOINT if PERF_EVENTS diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 7740ab10a171..dfe85e92ca2e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -343,7 +343,6 @@ config FORCE_MAX_ZONEORDER config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, say N. If you have a system with more diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 75661fbf4529..09ef94a8a7c3 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -275,7 +275,6 @@ source "kernel/Kconfig.preempt" config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index 36368eb07e13..e56abd2c1b4f 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -111,7 +111,6 @@ config METAG_META21 config SMP bool "Symmetric multi-processing support" depends on METAG_META21 && METAG_META21_MMU - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one thread running Linux. If you have a system with only one thread running Linux, diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 17cc7ff8458c..867d7db11581 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2125,7 +2125,6 @@ source "mm/Kconfig" config SMP bool "Multi-Processing support" depends on SYS_SUPPORTS_SMP - select USE_GENERIC_SMP_HELPERS help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 6aaa1607001a..8bde9237d13b 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -181,7 +181,6 @@ endmenu config SMP bool "Symmetric multi-processing support" default y - select USE_GENERIC_SMP_HELPERS depends on MN10300_PROC_MN2WS0038 || MN10300_PROC_MN2WS0050 ---help--- This enables support for systems with more than one CPU. If you have diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 7dcde539d61e..c03567a9a915 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -226,7 +226,6 @@ endchoice config SMP bool "Symmetric multi-processing support" - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2f898d63eb96..4740b0a15fa8 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -106,7 +106,6 @@ config PPC select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE select HAVE_DEBUG_KMEMLEAK select GENERIC_ATOMIC64 if PPC32 diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f75d7e517927..314fced4fc14 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -141,7 +141,6 @@ config S390 select OLD_SIGACTION select OLD_SIGSUSPEND3 select SYSCTL_EXCEPTION_TRACE - select USE_GENERIC_SMP_HELPERS if SMP select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 224f4bc9925e..e78561bc30ef 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -711,7 +711,6 @@ config CC_STACKPROTECTOR config SMP bool "Symmetric multi-processing support" depends on SYS_SUPPORTS_SMP - select USE_GENERIC_SMP_HELPERS ---help--- This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 78c4fdb91bc5..8591b201d9cc 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -28,7 +28,6 @@ config SPARC select HAVE_ARCH_JUMP_LABEL select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION - select USE_GENERIC_SMP_HELPERS if SMP select GENERIC_PCI_IOMAP select HAVE_NMI_WATCHDOG if SPARC64 select HAVE_BPF_JIT diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index d45a2c48f185..b3692ce78f90 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -8,7 +8,6 @@ config TILE select HAVE_KVM if !TILEGX select GENERIC_FIND_FIRST_BIT select SYSCTL_EXCEPTION_TRACE - select USE_GENERIC_SMP_HELPERS select CC_OPTIMIZE_FOR_SIZE select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_PROBE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index af5513e5798a..83f521aa103f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -90,7 +90,6 @@ config X86 select GENERIC_IRQ_SHOW select GENERIC_CLOCKEVENTS_MIN_ADJUST select IRQ_FORCED_THREADING - select USE_GENERIC_SMP_HELPERS if SMP select HAVE_BPF_JIT if X86_64 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select CLKEVT_I8253 diff --git a/block/blk-mq.c b/block/blk-mq.c index 88d4e864d4c0..c661896e2465 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -319,7 +319,7 @@ void __blk_mq_end_io(struct request *rq, int error) blk_mq_complete_request(rq, error); } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#if defined(CONFIG_SMP) /* * Called with interrupts disabled. @@ -361,7 +361,7 @@ static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, return true; } -#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#else /* CONFIG_SMP */ static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, struct request *rq, const int error) { diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ce4b8bfd3d27..57790c1a97eb 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -36,7 +36,7 @@ static void blk_done_softirq(struct softirq_action *h) } } -#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP static void trigger_softirq(void *data) { struct request *rq = data; @@ -71,7 +71,7 @@ static int raise_blk_irq(int cpu, struct request *rq) return 1; } -#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +#else /* CONFIG_SMP */ static int raise_blk_irq(int cpu, struct request *rq) { return 1; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 4f8c4d90ec73..97779522472f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -288,7 +288,7 @@ static ssize_t queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) { ssize_t ret = -EINVAL; -#if defined(CONFIG_USE_GENERIC_SMP_HELPERS) +#ifdef CONFIG_SMP unsigned long val; ret = queue_var_store(&val, page, count); diff --git a/include/linux/smp.h b/include/linux/smp.h index 731f5237d5f4..78851513030d 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -106,14 +106,10 @@ void kick_all_cpus_sync(void); /* * Generic and arch helpers */ -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS void __init call_function_init(void); void generic_smp_call_function_single_interrupt(void); #define generic_smp_call_function_interrupt \ generic_smp_call_function_single_interrupt -#else -static inline void call_function_init(void) { } -#endif /* * Mark the boot cpu "online" so that it can call console drivers in diff --git a/kernel/Kconfig.hz b/kernel/Kconfig.hz index 94fabd534b03..2a202a846757 100644 --- a/kernel/Kconfig.hz +++ b/kernel/Kconfig.hz @@ -55,4 +55,4 @@ config HZ default 1000 if HZ_1000 config SCHED_HRTICK - def_bool HIGH_RES_TIMERS && (!SMP || USE_GENERIC_SMP_HELPERS) + def_bool HIGH_RES_TIMERS diff --git a/kernel/smp.c b/kernel/smp.c index 46116100f0ee..1c194e20e943 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -15,7 +15,6 @@ #include "smpboot.h" -#ifdef CONFIG_USE_GENERIC_SMP_HELPERS enum { CSD_FLAG_LOCK = 0x01, CSD_FLAG_WAIT = 0x02, @@ -464,7 +463,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait) return 0; } EXPORT_SYMBOL(smp_call_function); -#endif /* USE_GENERIC_SMP_HELPERS */ /* Setup configured maximum number of CPUs to activate */ unsigned int setup_max_cpus = NR_CPUS; From 40c01e8bd5575e32633192513e09eac7155d6926 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:08 -0800 Subject: [PATCH 63/72] kernel: provide a __smp_call_function_single stub for !CONFIG_SMP Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/up.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/up.c b/kernel/up.c index 630d72bf7e41..509403e3fbc6 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -22,6 +22,17 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, } EXPORT_SYMBOL(smp_call_function_single); +void __smp_call_function_single(int cpu, struct call_single_data *csd, + int wait) +{ + unsigned long flags; + + local_irq_save(flags); + csd->func(csd->info); + local_irq_restore(flags); +} +EXPORT_SYMBOL(__smp_call_function_single); + int on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; From 7cf64f861b7a43b395f0855994003254b06a7e5a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:32:09 -0800 Subject: [PATCH 64/72] kernel-provide-a-__smp_call_function_single-stub-for-config_smp-fix x86_64 allnoconfig: kernel/up.c:25: error: redefinition of '__smp_call_function_single' include/linux/smp.h:154: note: previous definition of '__smp_call_function_single' was here Cc: Christoph Hellwig Cc: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/include/linux/smp.h b/include/linux/smp.h index 78851513030d..5da22ee42e16 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -49,6 +49,9 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info), smp_call_func_t func, void *info, bool wait, gfp_t gfp_flags); +void __smp_call_function_single(int cpuid, struct call_single_data *data, + int wait); + #ifdef CONFIG_SMP #include @@ -95,9 +98,6 @@ int smp_call_function(smp_call_func_t func, void *info, int wait); void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait); -void __smp_call_function_single(int cpuid, struct call_single_data *data, - int wait); - int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait); @@ -151,12 +151,6 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, static inline void kick_all_cpus_sync(void) { } -static inline void __smp_call_function_single(int cpuid, - struct call_single_data *data, int wait) -{ - on_each_cpu(data->func, data->info, wait); -} - #endif /* !SMP */ /* From ca5ecd64c2cdbcd316d789467147e732746f39fa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:10 -0800 Subject: [PATCH 65/72] kernel: fix generic_exec_single indentation Signed-off-by: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/smp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/smp.c b/kernel/smp.c index 1c194e20e943..bd9f94028838 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -139,8 +139,7 @@ static void csd_unlock(struct call_single_data *csd) * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static -void generic_exec_single(int cpu, struct call_single_data *csd, int wait) +static void generic_exec_single(int cpu, struct call_single_data *csd, int wait) { struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); unsigned long flags; From b89241e8cdb8321c20546d47645a9b65b58113b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Nov 2013 14:32:11 -0800 Subject: [PATCH 66/72] llists: move llist_reverse_order from raid5 to llist.c Make this useful helper available for other users. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 14 -------------- include/linux/llist.h | 2 ++ lib/llist.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f8b906843926..7f0e17a27aeb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -293,20 +293,6 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh) do_release_stripe(conf, sh); } -static struct llist_node *llist_reverse_order(struct llist_node *head) -{ - struct llist_node *new_head = NULL; - - while (head) { - struct llist_node *tmp = head; - head = head->next; - tmp->next = new_head; - new_head = tmp; - } - - return new_head; -} - /* should hold conf->device_lock already */ static int release_stripe_list(struct r5conf *conf) { diff --git a/include/linux/llist.h b/include/linux/llist.h index 8828a78dec9a..fbf10a0bc095 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -195,4 +195,6 @@ static inline struct llist_node *llist_del_all(struct llist_head *head) extern struct llist_node *llist_del_first(struct llist_head *head); +struct llist_node *llist_reverse_order(struct llist_node *head); + #endif /* LLIST_H */ diff --git a/lib/llist.c b/lib/llist.c index 4a70d120138c..ef48b87247ec 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head) return entry; } EXPORT_SYMBOL_GPL(llist_del_first); + +/** + * llist_reverse_order - reverse order of a llist chain + * @head: first item of the list to be reversed + * + * Reverse the oder of a chain of llist entries and return the + * new first entry. + */ +struct llist_node *llist_reverse_order(struct llist_node *head) +{ + struct llist_node *new_head = NULL; + + while (head) { + struct llist_node *tmp = head; + head = head->next; + tmp->next = new_head; + new_head = tmp; + } + + return new_head; +} +EXPORT_SYMBOL_GPL(llist_reverse_order); From 0791a6057cb60d12ec5e3182b99e6ffa8044ee3a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 14 Nov 2013 14:32:13 -0800 Subject: [PATCH 67/72] llists-move-llist_reverse_order-from-raid5-to-llistc-fix fix comment typo, per Jan Cc: Christoph Hellwig Cc: Jan Kara Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/llist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/llist.c b/lib/llist.c index ef48b87247ec..f76196d07409 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -86,7 +86,7 @@ EXPORT_SYMBOL_GPL(llist_del_first); * llist_reverse_order - reverse order of a llist chain * @head: first item of the list to be reversed * - * Reverse the oder of a chain of llist entries and return the + * Reverse the order of a chain of llist entries and return the * new first entry. */ struct llist_node *llist_reverse_order(struct llist_node *head) From 07968fe4acec6d49e39520ef407ea7f6874b7c2e Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 14 Nov 2013 14:32:15 -0800 Subject: [PATCH 68/72] sound/core/memalloc.c: use gen_pool_dma_alloc() to allocate iram buffer Since gen_pool_dma_alloc() is introduced, we implement it to simplify code. Signed-off-by: Nicolin Chen Acked-by: Takashi Iwai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- sound/core/memalloc.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 9d93f02c6285..5e1c7bc73b29 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -184,11 +184,7 @@ static void snd_malloc_dev_iram(struct snd_dma_buffer *dmab, size_t size) /* Assign the pool into private_data field */ dmab->private_data = pool; - dmab->area = (void *)gen_pool_alloc(pool, size); - if (!dmab->area) - return; - - dmab->addr = gen_pool_virt_to_phys(pool, (unsigned long)dmab->area); + dmab->area = gen_pool_dma_alloc(pool, size, &dmab->addr); } /** From a019e48cfbfb358786326db3dbc1c565b8f14a56 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Thu, 14 Nov 2013 14:32:16 -0800 Subject: [PATCH 69/72] kfifo: kfifo_copy_{to,from}_user: fix copied bytes calculation 'copied' and 'len' are in bytes, while 'ret' is in elements, so we need to multiply 'ret' with the size of one element to get the correct result. Signed-off-by: Lars-Peter Clausen Cc: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kfifo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/kfifo.c b/lib/kfifo.c index 7b7f83027b7b..d79b9d222065 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, * incrementing the fifo->in index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } @@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, * incrementing the fifo->out index counter */ smp_wmb(); - *copied = len - ret; + *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } From 498d319bb512992ef0784c278fa03679f2f5649d Mon Sep 17 00:00:00 2001 From: Stefani Seibold Date: Thu, 14 Nov 2013 14:32:17 -0800 Subject: [PATCH 70/72] kfifo API type safety This patch enhances the type safety for the kfifo API. It is now safe to put const data into a non const FIFO and the API will now generate a compiler warning when reading from the fifo where the destination address is pointing to a const variable. As a side effect the kfifo_put() does now expect the value of an element instead a pointer to the element. This was suggested Russell King. It make the handling of the kfifo_put easier since there is no need to create a helper variable for getting the address of a pointer or to pass integers of different sizes. IMHO the API break is okay, since there are currently only six users of kfifo_put(). The code is also cleaner by kicking out the "if (0)" expressions. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Stefani Seibold Cc: Russell King Cc: Hauke Mehrtens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_flip_work.c | 2 +- drivers/iio/industrialio-event.c | 2 +- drivers/net/wireless/rt2x00/rt2800mmio.c | 2 +- drivers/net/wireless/rt2x00/rt2800usb.c | 2 +- drivers/pci/pcie/aer/aerdrv_core.c | 2 +- include/linux/kfifo.h | 47 +++++++----------------- mm/memory-failure.c | 2 +- samples/kfifo/bytestream-example.c | 4 +- samples/kfifo/dma-example.c | 2 +- samples/kfifo/inttype-example.c | 4 +- 10 files changed, 25 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c index e788882d9021..f9c7fa3d0012 100644 --- a/drivers/gpu/drm/drm_flip_work.c +++ b/drivers/gpu/drm/drm_flip_work.c @@ -34,7 +34,7 @@ */ void drm_flip_work_queue(struct drm_flip_work *work, void *val) { - if (kfifo_put(&work->fifo, (const void **)&val)) { + if (kfifo_put(&work->fifo, val)) { atomic_inc(&work->pending); } else { DRM_ERROR("%s fifo full!\n", work->name); diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c index dac15b9f9df8..c10eab64bc05 100644 --- a/drivers/iio/industrialio-event.c +++ b/drivers/iio/industrialio-event.c @@ -56,7 +56,7 @@ int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp) ev.id = ev_code; ev.timestamp = timestamp; - copied = kfifo_put(&ev_int->det_events, &ev); + copied = kfifo_put(&ev_int->det_events, ev); if (copied != 0) wake_up_locked_poll(&ev_int->wait, POLLIN); } diff --git a/drivers/net/wireless/rt2x00/rt2800mmio.c b/drivers/net/wireless/rt2x00/rt2800mmio.c index ae152280e071..a8cc736b5063 100644 --- a/drivers/net/wireless/rt2x00/rt2800mmio.c +++ b/drivers/net/wireless/rt2x00/rt2800mmio.c @@ -446,7 +446,7 @@ static void rt2800mmio_txstatus_interrupt(struct rt2x00_dev *rt2x00dev) if (!rt2x00_get_field32(status, TX_STA_FIFO_VALID)) break; - if (!kfifo_put(&rt2x00dev->txstatus_fifo, &status)) { + if (!kfifo_put(&rt2x00dev->txstatus_fifo, status)) { rt2x00_warn(rt2x00dev, "TX status FIFO overrun, drop tx status report\n"); break; } diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 997df03a0c2e..a81ceb61d746 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -164,7 +164,7 @@ static bool rt2800usb_tx_sta_fifo_read_completed(struct rt2x00_dev *rt2x00dev, valid = rt2x00_get_field32(tx_status, TX_STA_FIFO_VALID); if (valid) { - if (!kfifo_put(&rt2x00dev->txstatus_fifo, &tx_status)) + if (!kfifo_put(&rt2x00dev->txstatus_fifo, tx_status)) rt2x00_warn(rt2x00dev, "TX status FIFO overrun\n"); queue_work(rt2x00dev->workqueue, &rt2x00dev->txdone_work); diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 85ca36f2136d..6b3a958e1be6 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -574,7 +574,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, }; spin_lock_irqsave(&aer_recover_ring_lock, flags); - if (kfifo_put(&aer_recover_ring, &entry)) + if (kfifo_put(&aer_recover_ring, entry)) schedule_work(&aer_recover_work); else pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 10308c6a3d1c..552d51efb429 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -1,7 +1,7 @@ /* * A generic kernel FIFO implementation * - * Copyright (C) 2009/2010 Stefani Seibold + * Copyright (C) 2013 Stefani Seibold * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,9 +67,10 @@ struct __kfifo { union { \ struct __kfifo kfifo; \ datatype *type; \ + const datatype *const_type; \ char (*rectype)[recsize]; \ ptrtype *ptr; \ - const ptrtype *ptr_const; \ + ptrtype const *ptr_const; \ } #define __STRUCT_KFIFO(type, size, recsize, ptrtype) \ @@ -386,16 +387,12 @@ __kfifo_int_must_check_helper( \ #define kfifo_put(fifo, val) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(*__tmp->const_type) __val = (val); \ unsigned int __ret; \ - const size_t __recsize = sizeof(*__tmp->rectype); \ + size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \ - __dummy = (typeof(__val))NULL; \ - } \ if (__recsize) \ - __ret = __kfifo_in_r(__kfifo, __val, sizeof(*__val), \ + __ret = __kfifo_in_r(__kfifo, &__val, sizeof(__val), \ __recsize); \ else { \ __ret = !kfifo_is_full(__tmp); \ @@ -404,7 +401,7 @@ __kfifo_int_must_check_helper( \ ((typeof(__tmp->type))__kfifo->data) : \ (__tmp->buf) \ )[__kfifo->in & __tmp->kfifo.mask] = \ - *(typeof(__tmp->type))__val; \ + (typeof(*__tmp->type))__val; \ smp_wmb(); \ __kfifo->in++; \ } \ @@ -415,7 +412,7 @@ __kfifo_int_must_check_helper( \ /** * kfifo_get - get data from the fifo * @fifo: address of the fifo to be used - * @val: the var where to store the data to be added + * @val: address where to store the data * * This macro reads the data from the fifo. * It returns 0 if the fifo was empty. Otherwise it returns the number @@ -428,12 +425,10 @@ __kfifo_int_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(__tmp->ptr) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) \ - __val = (typeof(__tmp->ptr))0; \ if (__recsize) \ __ret = __kfifo_out_r(__kfifo, __val, sizeof(*__val), \ __recsize); \ @@ -456,7 +451,7 @@ __kfifo_uint_must_check_helper( \ /** * kfifo_peek - get data from the fifo without removing * @fifo: address of the fifo to be used - * @val: the var where to store the data to be added + * @val: address where to store the data * * This reads the data from the fifo without removing it from the fifo. * It returns 0 if the fifo was empty. Otherwise it returns the number @@ -469,12 +464,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((val) + 1) __val = (val); \ + typeof(__tmp->ptr) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) \ - __val = (typeof(__tmp->ptr))NULL; \ if (__recsize) \ __ret = __kfifo_out_peek_r(__kfifo, __val, sizeof(*__val), \ __recsize); \ @@ -508,14 +501,10 @@ __kfifo_uint_must_check_helper( \ #define kfifo_in(fifo, buf, n) \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr_const) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr_const) __dummy __attribute__ ((unused)); \ - __dummy = (typeof(__buf))NULL; \ - } \ (__recsize) ?\ __kfifo_in_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_in(__kfifo, __buf, __n); \ @@ -561,14 +550,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr) __dummy = NULL; \ - __buf = __dummy; \ - } \ (__recsize) ?\ __kfifo_out_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_out(__kfifo, __buf, __n); \ @@ -773,14 +758,10 @@ __kfifo_uint_must_check_helper( \ __kfifo_uint_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ - typeof((buf) + 1) __buf = (buf); \ + typeof(__tmp->ptr) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ - if (0) { \ - typeof(__tmp->ptr) __dummy __attribute__ ((unused)) = NULL; \ - __buf = __dummy; \ - } \ (__recsize) ? \ __kfifo_out_peek_r(__kfifo, __buf, __n, __recsize) : \ __kfifo_out_peek(__kfifo, __buf, __n); \ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f9d78ec7831f..b7c171602ba1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1269,7 +1269,7 @@ void memory_failure_queue(unsigned long pfn, int trapno, int flags) mf_cpu = &get_cpu_var(memory_failure_cpu); spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, &entry)) + if (kfifo_put(&mf_cpu->fifo, entry)) schedule_work_on(smp_processor_id(), &mf_cpu->work); else pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n", diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index cfe40addda76..2fca916d9edf 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -64,7 +64,7 @@ static int __init testfunc(void) /* put values into the fifo */ for (i = 0; i != 10; i++) - kfifo_put(&test, &i); + kfifo_put(&test, i); /* show the number of used elements */ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); @@ -85,7 +85,7 @@ static int __init testfunc(void) kfifo_skip(&test); /* put values into the fifo until is full */ - for (i = 20; kfifo_put(&test, &i); i++) + for (i = 20; kfifo_put(&test, i); i++) ; printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 06473791c08a..aa243db93f01 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -39,7 +39,7 @@ static int __init example_init(void) kfifo_in(&fifo, "test", 4); for (i = 0; i != 9; i++) - kfifo_put(&fifo, &i); + kfifo_put(&fifo, i); /* kick away first byte */ kfifo_skip(&fifo); diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index 6f8e79e76c9e..8dc3c2e7105a 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -61,7 +61,7 @@ static int __init testfunc(void) /* put values into the fifo */ for (i = 0; i != 10; i++) - kfifo_put(&test, &i); + kfifo_put(&test, i); /* show the number of used elements */ printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); @@ -78,7 +78,7 @@ static int __init testfunc(void) kfifo_skip(&test); /* put values into the fifo until is full */ - for (i = 20; kfifo_put(&test, &i); i++) + for (i = 20; kfifo_put(&test, i); i++) ; printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); From a99b7069aab8fc3fb4f26d15795dc280b52e38b1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 14 Nov 2013 14:32:18 -0800 Subject: [PATCH 71/72] hfsplus: Fix undefined __divdi3 in hfsplus_init_header_node() ERROR: "__divdi3" [fs/hfsplus/hfsplus.ko] undefined! Introduced by commit 099e9245e04d ("hfsplus: implement attributes file's header node initialization code"). i_size_read() returns loff_t, which is long long, i.e. 64-bit. node_size is size_t, which is either 32-bit or 64-bit. Hence "i_size_read(attr_file) / node_size" is a 64-by-32 or 64-by-64 division, causing (some versions of) gcc to emit a call to __divdi3(). Fortunately node_size is actually 16-bit, as the sole caller of hfsplus_init_header_node() passes a u16. Hence change its type from size_t to u16, and use do_div() to perform a 64-by-32 division. Not seen in m68k/allmodconfig in -next, so it really depends on the verion of gcc. Signed-off-by: Geert Uytterhoeven Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/xattr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index efc85b1377cc..3c6136f98c73 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -129,7 +129,7 @@ static int can_set_xattr(struct inode *inode, const char *name, static void hfsplus_init_header_node(struct inode *attr_file, u32 clump_size, - char *buf, size_t node_size) + char *buf, u16 node_size) { struct hfs_bnode_desc *desc; struct hfs_btree_header_rec *head; @@ -139,8 +139,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, char *bmp; u32 used_nodes; u32 used_bmp_bytes; + loff_t tmp; - hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %zu\n", + hfs_dbg(ATTR_MOD, "init_hdr_attr_file: clump %u, node_size %u\n", clump_size, node_size); /* The end of the node contains list of record offsets */ @@ -154,7 +155,9 @@ static void hfsplus_init_header_node(struct inode *attr_file, head = (struct hfs_btree_header_rec *)(buf + offset); head->node_size = cpu_to_be16(node_size); - head->node_count = cpu_to_be32(i_size_read(attr_file) / node_size); + tmp = i_size_read(attr_file); + do_div(tmp, node_size); + head->node_count = cpu_to_be32(tmp); head->free_nodes = cpu_to_be32(be32_to_cpu(head->node_count) - 1); head->clump_size = cpu_to_be32(clump_size); head->attributes |= cpu_to_be32(HFS_TREE_BIGKEYS | HFS_TREE_VARIDXKEYS); From 8d3ef556aba2b5b7d8b7144f7be1814d75ea3cc6 Mon Sep 17 00:00:00 2001 From: Alexander Beregalov Date: Thu, 14 Nov 2013 14:32:19 -0800 Subject: [PATCH 72/72] cmdline-parser: fix build Fix following errors: include/linux/cmdline-parser.h:17:12: error: 'BDEVNAME_SIZE' undeclared here block/cmdline-parser.c:17:2: error: implicit declaration of function 'kzalloc' Signed-off-by: Alexander Beregalov Cc: CaiZhiyong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cmdline-parser.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h index 98e892ef6d5a..a0f9280421ec 100644 --- a/include/linux/cmdline-parser.h +++ b/include/linux/cmdline-parser.h @@ -8,6 +8,8 @@ #define CMDLINEPARSEH #include +#include +#include /* partition flags */ #define PF_RDONLY 0x01 /* Device is read only */