From 9babd5c8caa6e62c116efc3a64a09f65af4112b0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:17 +0100 Subject: [PATCH 01/28] resource: Add System RAM resource type The IORESOURCE_MEM I/O resource type is used for all types of memory-mapped ranges, ex. System RAM, System ROM, Video RAM, Persistent Memory, PCI Bus, PCI MMCONFIG, ACPI Tables, IOAPIC, reserved, and so on. This requires walk_system_ram_range(), walk_system_ram_res(), and region_intersects() to use strcmp() against string "System RAM" to search for System RAM ranges in the iomem table, which is inefficient. __ioremap_caller() and reserve_memtype() on x86, for instance, call walk_system_ram_range() for every request to check if a given range is in System RAM ranges. However, adding a new I/O resource type for System RAM is not a viable option, see [1]. There are approx. 3800 references to IORESOURCE_MEM in the kernel/drivers, which makes it very difficult to distinguish their usages between new type and IORESOURCE_MEM. The I/O resource types are also used by the PNP subsystem. Therefore, introduce an extended I/O resource type, IORESOURCE_SYSTEM_RAM, which consists of IORESOURCE_MEM and a new modifier flag IORESOURCE_SYSRAM, see [2]. To keep the code 'if (resource_type(r) == IORESOURCE_MEM)' still working for System RAM, resource_ext_type() is added for extracting extended type bits. Link[1]: http://lkml.kernel.org/r/1449168859.9855.54.camel@hpe.com Link[2]: http://lkml.kernel.org/r/CA+55aFy4WQrWexC4u2LxX9Mw2NVoznw7p3Yh=iF4Xtf7zKWnRw@mail.gmail.com Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hanjun Guo Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-2-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 24bea087e7af..4b65d944717f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -49,12 +49,19 @@ struct resource { #define IORESOURCE_WINDOW 0x00200000 /* forwarded by bridge */ #define IORESOURCE_MUXED 0x00400000 /* Resource is software muxed */ +#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */ +#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */ + #define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */ + #define IORESOURCE_DISABLED 0x10000000 #define IORESOURCE_UNSET 0x20000000 /* No address assigned yet */ #define IORESOURCE_AUTO 0x40000000 #define IORESOURCE_BUSY 0x80000000 /* Driver has marked this resource busy */ +/* I/O resource extended types */ +#define IORESOURCE_SYSTEM_RAM (IORESOURCE_MEM|IORESOURCE_SYSRAM) + /* PnP IRQ specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IRQ_HIGHEDGE (1<<0) #define IORESOURCE_IRQ_LOWEDGE (1<<1) @@ -170,6 +177,10 @@ static inline unsigned long resource_type(const struct resource *res) { return res->flags & IORESOURCE_TYPE_BITS; } +static inline unsigned long resource_ext_type(const struct resource *res) +{ + return res->flags & IORESOURCE_EXT_TYPE_BITS; +} /* True iff r1 completely contains r2 */ static inline bool resource_contains(struct resource *r1, struct resource *r2) { From a3650d53ba16ec412185abb98f231e9ba6bcdc65 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:18 +0100 Subject: [PATCH 02/28] resource: Handle resource flags properly I/O resource flags consist of I/O resource types and modifier bits. Therefore, checking an I/O resource type in 'flags' must be performed with a bitwise operation. Fix find_next_iomem_res() and region_intersects() that simply compare 'flags' against a given value. Also change __request_region() to set 'res->flags' from resource_type() and resource_ext_type() of the parent, so that children nodes will inherit the extended I/O resource type. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vinod Koul Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-3-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- kernel/resource.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 09c0597840b0..96afc8027487 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -358,7 +358,7 @@ static int find_next_iomem_res(struct resource *res, char *name, read_lock(&resource_lock); for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { - if (p->flags != res->flags) + if ((p->flags & res->flags) != res->flags) continue; if (name && strcmp(p->name, name)) continue; @@ -519,7 +519,8 @@ int region_intersects(resource_size_t start, size_t size, const char *name) read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { - bool is_type = strcmp(p->name, name) == 0 && p->flags == flags; + bool is_type = strcmp(p->name, name) == 0 && + ((p->flags & flags) == flags); if (start >= p->start && start <= p->end) is_type ? type++ : other++; @@ -1071,7 +1072,7 @@ struct resource * __request_region(struct resource *parent, res->name = name; res->start = start; res->end = start + n - 1; - res->flags = resource_type(parent); + res->flags = resource_type(parent) | resource_ext_type(parent); res->flags |= IORESOURCE_BUSY | flags; write_lock(&resource_lock); From 43ee493bde78da00deaf5737925365c691a036ad Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:19 +0100 Subject: [PATCH 03/28] resource: Add I/O resource descriptor walk_iomem_res() and region_intersects() still need to use strcmp() for searching a resource entry by @name in the iomem table. This patch introduces I/O resource descriptor 'desc' in struct resource for the iomem search interfaces. Drivers can assign their unique descriptor to a range when they support the search interfaces. Otherwise, 'desc' is set to IORES_DESC_NONE (0). This avoids changing most of the drivers as they typically allocate resource entries statically, or by calling alloc_resource(), kzalloc(), or alloc_bootmem_low(), which set the field to zero by default. A later patch will address some drivers that use kmalloc() without zero'ing the field. Also change release_mem_region_adjustable() to set 'desc' when its resource entry gets separated. Other resource interfaces are also changed to initialize 'desc' explicitly although alloc_resource() sets it to 0. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 18 ++++++++++++++++++ kernel/resource.c | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 4b65d944717f..983bea05d69c 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -20,6 +20,7 @@ struct resource { resource_size_t end; const char *name; unsigned long flags; + unsigned long desc; struct resource *parent, *sibling, *child; }; @@ -112,6 +113,22 @@ struct resource { /* PCI control bits. Shares IORESOURCE_BITS with above PCI ROM. */ #define IORESOURCE_PCI_FIXED (1<<4) /* Do not move resource */ +/* + * I/O Resource Descriptors + * + * Descriptors are used by walk_iomem_res_desc() and region_intersects() + * for searching a specific resource range in the iomem table. Assign + * a new descriptor when a resource range supports the search interfaces. + * Otherwise, resource.desc must be set to IORES_DESC_NONE (0). + */ +enum { + IORES_DESC_NONE = 0, + IORES_DESC_CRASH_KERNEL = 1, + IORES_DESC_ACPI_TABLES = 2, + IORES_DESC_ACPI_NV_STORAGE = 3, + IORES_DESC_PERSISTENT_MEMORY = 4, + IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, +}; /* helpers to define resources */ #define DEFINE_RES_NAMED(_start, _size, _name, _flags) \ @@ -120,6 +137,7 @@ struct resource { .end = (_start) + (_size) - 1, \ .name = (_name), \ .flags = (_flags), \ + .desc = IORES_DESC_NONE, \ } #define DEFINE_RES_IO_NAMED(_start, _size, _name) \ diff --git a/kernel/resource.c b/kernel/resource.c index 96afc8027487..61512e972ece 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -949,6 +949,7 @@ static void __init __reserve_region_with_split(struct resource *root, res->start = start; res->end = end; res->flags = IORESOURCE_BUSY; + res->desc = IORES_DESC_NONE; while (1) { @@ -983,6 +984,7 @@ static void __init __reserve_region_with_split(struct resource *root, next_res->start = conflict->end + 1; next_res->end = end; next_res->flags = IORESOURCE_BUSY; + next_res->desc = IORES_DESC_NONE; } } else { res->start = conflict->end + 1; @@ -1074,6 +1076,7 @@ struct resource * __request_region(struct resource *parent, res->end = start + n - 1; res->flags = resource_type(parent) | resource_ext_type(parent); res->flags |= IORESOURCE_BUSY | flags; + res->desc = IORES_DESC_NONE; write_lock(&resource_lock); @@ -1238,6 +1241,7 @@ int release_mem_region_adjustable(struct resource *parent, new_res->start = end + 1; new_res->end = res->end; new_res->flags = res->flags; + new_res->desc = res->desc; new_res->parent = res->parent; new_res->sibling = res->sibling; new_res->child = NULL; @@ -1413,6 +1417,7 @@ static int __init reserve_setup(char *str) res->start = io_start; res->end = io_start + io_num - 1; res->flags = IORESOURCE_BUSY; + res->desc = IORES_DESC_NONE; res->child = NULL; if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0) reserved = x+1; From f33b14a4b96b185634848046f54fb0d5028566a9 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:20 +0100 Subject: [PATCH 04/28] x86/e820: Set System RAM type and descriptor Change e820_reserve_resources() to set 'flags' and 'desc' from e820 types. Set E820_RESERVED_KERN and E820_RAM's (System RAM) io resource type to IORESOURCE_SYSTEM_RAM. Do the same for "Kernel data", "Kernel code", and "Kernel bss", which are child nodes of System RAM. I/O resource descriptor is set to 'desc' for entries that are (and will be) target ranges of walk_iomem_res() and region_intersects(). Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Dave Young Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Kosina Cc: Joerg Roedel Cc: Juergen Gross Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mark Salter Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Toshi Kani Cc: WANG Chao Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-5-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 38 +++++++++++++++++++++++++++++++++++++- arch/x86/kernel/setup.c | 6 +++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 569c1e4f96fe..837365f10912 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -925,6 +925,41 @@ static const char *e820_type_to_string(int e820_type) } } +static unsigned long e820_type_to_iomem_type(int e820_type) +{ + switch (e820_type) { + case E820_RESERVED_KERN: + case E820_RAM: + return IORESOURCE_SYSTEM_RAM; + case E820_ACPI: + case E820_NVS: + case E820_UNUSABLE: + case E820_PRAM: + case E820_PMEM: + default: + return IORESOURCE_MEM; + } +} + +static unsigned long e820_type_to_iores_desc(int e820_type) +{ + switch (e820_type) { + case E820_ACPI: + return IORES_DESC_ACPI_TABLES; + case E820_NVS: + return IORES_DESC_ACPI_NV_STORAGE; + case E820_PMEM: + return IORES_DESC_PERSISTENT_MEMORY; + case E820_PRAM: + return IORES_DESC_PERSISTENT_MEMORY_LEGACY; + case E820_RESERVED_KERN: + case E820_RAM: + case E820_UNUSABLE: + default: + return IORES_DESC_NONE; + } +} + static bool do_mark_busy(u32 type, struct resource *res) { /* this is the legacy bios/dos rom-shadow + mmio region */ @@ -967,7 +1002,8 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM; + res->flags = e820_type_to_iomem_type(e820.map[i].type); + res->desc = e820_type_to_iores_desc(e820.map[i].type); /* * don't register the region that could be conflicted with diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3d80e6d42a2..aa52c1009475 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -152,21 +152,21 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; From 03cb525eb25018cf5f3da01d0f1391fc8b37805a Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:21 +0100 Subject: [PATCH 05/28] ia64: Set System RAM type and descriptor Change efi_initialize_iomem_resources() to set 'flags' and 'desc' for EFI memory types. IORESOURCE_SYSRAM, a modifier bit, is set for System RAM as IORESOURCE_MEM is already set. IORESOURCE_SYSTEM_RAM is defined as (IORESOURCE_MEM|IORESOURCE_SYSRAM). I/O resource descriptor is set for "ACPI Non-volatile Storage" and "Persistent Memory". Also set IORESOURCE_SYSTEM_RAM for "Kernel code", "Kernel data", and "Kernel bss". Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Tony Luck Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Matt Fleming Cc: Peter Zijlstra Cc: Rusty Russell Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-efi Cc: linux-ia64@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-6-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/ia64/kernel/efi.c | 13 ++++++++++--- arch/ia64/kernel/setup.c | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index caae3f4e4341..300dac3702f1 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -1178,7 +1178,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, efi_memory_desc_t *md; u64 efi_desc_size; char *name; - unsigned long flags; + unsigned long flags, desc; efi_map_start = __va(ia64_boot_param->efi_memmap); efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; @@ -1193,6 +1193,8 @@ efi_initialize_iomem_resources(struct resource *code_resource, continue; flags = IORESOURCE_MEM | IORESOURCE_BUSY; + desc = IORES_DESC_NONE; + switch (md->type) { case EFI_MEMORY_MAPPED_IO: @@ -1207,14 +1209,17 @@ efi_initialize_iomem_resources(struct resource *code_resource, if (md->attribute & EFI_MEMORY_WP) { name = "System ROM"; flags |= IORESOURCE_READONLY; - } else if (md->attribute == EFI_MEMORY_UC) + } else if (md->attribute == EFI_MEMORY_UC) { name = "Uncached RAM"; - else + } else { name = "System RAM"; + flags |= IORESOURCE_SYSRAM; + } break; case EFI_ACPI_MEMORY_NVS: name = "ACPI Non-volatile Storage"; + desc = IORES_DESC_ACPI_NV_STORAGE; break; case EFI_UNUSABLE_MEMORY: @@ -1224,6 +1229,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, case EFI_PERSISTENT_MEMORY: name = "Persistent Memory"; + desc = IORES_DESC_PERSISTENT_MEMORY; break; case EFI_RESERVED_TYPE: @@ -1246,6 +1252,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, res->start = md->phys_addr; res->end = md->phys_addr + efi_md_size(md) - 1; res->flags = flags; + res->desc = desc; if (insert_resource(&iomem_resource, res) < 0) kfree(res); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 4f118b0d3091..2029a38a72ae 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -80,17 +80,17 @@ unsigned long vga_console_membase; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long ia64_max_cacheline_size; From 35d98e93fe6a7ab612f6b389ce42c1dc135d6eef Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:22 +0100 Subject: [PATCH 06/28] arch: Set IORESOURCE_SYSTEM_RAM flag for System RAM Set IORESOURCE_SYSTEM_RAM in flags of resource ranges with "System RAM", "Kernel code", "Kernel data", and "Kernel bss". Note that: - IORESOURCE_SYSRAM (i.e. modifier bit) is set in flags when IORESOURCE_MEM is already set. IORESOURCE_SYSTEM_RAM is defined as (IORESOURCE_MEM|IORESOURCE_SYSRAM). - Some archs do not set 'flags' for children nodes, such as "Kernel code". This patch does not change 'flags' in this case. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mips@linux-mips.org Cc: linux-mm Cc: linux-parisc@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1453841853-11383-7-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/arm/kernel/setup.c | 6 +++--- arch/arm64/kernel/setup.c | 6 +++--- arch/avr32/kernel/setup.c | 6 +++--- arch/m32r/kernel/setup.c | 4 ++-- arch/mips/kernel/setup.c | 10 ++++++---- arch/parisc/mm/init.c | 6 +++--- arch/powerpc/mm/mem.c | 2 +- arch/s390/kernel/setup.c | 8 ++++---- arch/score/kernel/setup.c | 2 +- arch/sh/kernel/setup.c | 8 ++++---- arch/sparc/mm/init_64.c | 8 ++++---- arch/tile/kernel/setup.c | 11 ++++++++--- arch/unicore32/kernel/setup.c | 6 +++--- 13 files changed, 45 insertions(+), 38 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7d0cba6f1cc5..139791ed473d 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -176,13 +176,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -851,7 +851,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 8119479147db..450987d99b9b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -73,13 +73,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -210,7 +210,7 @@ static void __init request_standard_resources(void) res->name = "System RAM"; res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c index 209ae5ad3495..e6928896da2a 100644 --- a/arch/avr32/kernel/setup.c +++ b/arch/avr32/kernel/setup.c @@ -49,13 +49,13 @@ static struct resource __initdata kernel_data = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata kernel_code = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM, + .flags = IORESOURCE_SYSTEM_RAM, .sibling = &kernel_data, }; @@ -134,7 +134,7 @@ add_physical_memory(resource_size_t start, resource_size_t end) new->start = start; new->end = end; new->name = "System RAM"; - new->flags = IORESOURCE_MEM; + new->flags = IORESOURCE_SYSTEM_RAM; *pprev = new; } diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index a5ecef7188ba..136c69f1fb8a 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -70,14 +70,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; unsigned long memory_start; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 569a7d5242dd..c745f0ea2577 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -732,21 +732,23 @@ static void __init resource_init(void) end = HIGHMEM_START - 1; res = alloc_bootmem(sizeof(struct resource)); + + res->start = start; + res->end = end; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + switch (boot_mem_map.map[i].type) { case BOOT_MEM_RAM: case BOOT_MEM_INIT_RAM: case BOOT_MEM_ROM_DATA: res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; break; case BOOT_MEM_RESERVED: default: res->name = "reserved"; } - res->start = start; - res->end = end; - - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); /* diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1b366c477687..3c07d6b96877 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -55,12 +55,12 @@ signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource pdcdata_resource = { @@ -201,7 +201,7 @@ static void __init setup_bootmem(void) res->name = "System RAM"; res->start = pmem_ranges[i].start_pfn << PAGE_SHIFT; res->end = res->start + (pmem_ranges[i].pages << PAGE_SHIFT)-1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d0f0a514b04e..f078a1f94fc2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -541,7 +541,7 @@ static int __init add_system_ram_resources(void) res->name = "System RAM"; res->start = base; res->end = base + size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; WARN_ON(request_resource(&iomem_resource, res) < 0); } } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9220db5c996a..cedb0198675f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -374,17 +374,17 @@ static void __init setup_lowcore(void) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource __initdata *standard_resources[] = { @@ -408,7 +408,7 @@ static void __init setup_resources(void) for_each_memblock(memory, reg) { res = alloc_bootmem_low(sizeof(*res)); - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->name = "System RAM"; res->start = reg->base; diff --git a/arch/score/kernel/setup.c b/arch/score/kernel/setup.c index b48459afefdd..f3a0649ab521 100644 --- a/arch/score/kernel/setup.c +++ b/arch/score/kernel/setup.c @@ -101,7 +101,7 @@ static void __init resource_init(void) res->name = "System RAM"; res->start = MEMORY_START; res->end = MEMORY_START + MEMORY_SIZE - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); request_resource(res, &code_resource); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index de19cfa768f2..3f1c18b28e8a 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -78,17 +78,17 @@ static char __initdata command_line[COMMAND_LINE_SIZE] = { 0, }; static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM, + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, }; unsigned long memory_start; @@ -202,7 +202,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, res->name = "System RAM"; res->start = start; res->end = end - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res)) { pr_err("unable to request memory_resource 0x%lx 0x%lx\n", diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 6f216853f272..1cfe6aab7a11 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -2863,17 +2863,17 @@ void hugetlb_setup(struct pt_regs *regs) static struct resource code_resource = { .name = "Kernel code", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource data_resource = { .name = "Kernel data", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource bss_resource = { .name = "Kernel bss", - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static inline resource_size_t compute_kern_paddr(void *addr) @@ -2909,7 +2909,7 @@ static int __init report_memory(void) res->name = "System RAM"; res->start = pavail[i].phys_addr; res->end = pavail[i].phys_addr + pavail[i].reg_size - 1; - res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; if (insert_resource(&iomem_resource, res) < 0) { pr_warn("Resource insertion failed.\n"); diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index bbb855de6569..a992238e9b58 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -1632,14 +1632,14 @@ static struct resource data_resource = { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; static struct resource code_resource = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM }; /* @@ -1673,10 +1673,15 @@ insert_ram_resource(u64 start_pfn, u64 end_pfn, bool reserved) kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) return NULL; - res->name = reserved ? "Reserved" : "System RAM"; res->start = start_pfn << PAGE_SHIFT; res->end = (end_pfn << PAGE_SHIFT) - 1; res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + if (reserved) { + res->name = "Reserved"; + } else { + res->name = "System RAM"; + res->flags |= IORESOURCE_SYSRAM; + } if (insert_resource(&iomem_resource, res)) { kfree(res); return NULL; diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c index 3fa317f96122..c2bffa5614a4 100644 --- a/arch/unicore32/kernel/setup.c +++ b/arch/unicore32/kernel/setup.c @@ -72,13 +72,13 @@ static struct resource mem_res[] = { .name = "Kernel code", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM }, { .name = "Kernel data", .start = 0, .end = 0, - .flags = IORESOURCE_MEM + .flags = IORESOURCE_SYSTEM_RAM } }; @@ -211,7 +211,7 @@ request_standard_resources(struct meminfo *mi) res->name = "System RAM"; res->start = mi->bank[i].start; res->end = mi->bank[i].start + mi->bank[i].size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); From 1a085d0727afaedb9506f04798516298b1676e11 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:23 +0100 Subject: [PATCH 07/28] kexec: Set IORESOURCE_SYSTEM_RAM for System RAM Set proper ioresource flags and types for crash kernel reservation areas. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Dave Young Cc: Andrew Morton Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: HATAYAMA Daisuke Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Minfei Huang Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vivek Goyal Cc: kexec@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-8-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- kernel/kexec_core.c | 8 +++++--- kernel/kexec_file.c | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8dc659144869..8d34308ea449 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -66,13 +66,15 @@ struct resource crashk_res = { .name = "Crash kernel", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL }; struct resource crashk_low_res = { .name = "Crash kernel", .start = 0, .end = 0, - .flags = IORESOURCE_BUSY | IORESOURCE_MEM + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, + .desc = IORES_DESC_CRASH_KERNEL }; int kexec_should_crash(struct task_struct *p) @@ -959,7 +961,7 @@ int crash_shrink_memory(unsigned long new_size) ram_res->start = end; ram_res->end = crashk_res.end; - ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; + ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; ram_res->name = "System RAM"; crashk_res.end = end - 1; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 007b791f676d..2bfcdc064116 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -525,7 +525,7 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, /* Walk the RAM ranges and allocate a suitable range for the buffer */ if (image->type == KEXEC_TYPE_CRASH) ret = walk_iomem_res("Crash kernel", - IORESOURCE_MEM | IORESOURCE_BUSY, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, crashk_res.start, crashk_res.end, kbuf, locate_mem_hole_callback); else From 782b86641e5d471e9eb1cf0072c012d2f758e568 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:24 +0100 Subject: [PATCH 08/28] xen, mm: Set IORESOURCE_SYSTEM_RAM to System RAM Set IORESOURCE_SYSTEM_RAM in struct resource.flags of "System RAM" entries. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: David Vrabel # xen Cc: Andrew Banman Cc: Andrew Morton Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: David Rientjes Cc: Denys Vlasenko Cc: Gu Zheng Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Mel Gorman Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Tang Chen Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-mm Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1453841853-11383-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/xen/balloon.c | 2 +- mm/memory_hotplug.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 12eab503efd1..dc4305b407bf 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -257,7 +257,7 @@ static struct resource *additional_memory_resource(phys_addr_t size) return NULL; res->name = "System RAM"; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; ret = allocate_resource(&iomem_resource, res, size, 0, -1, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4af58a3a8ffa..979b18cbd343 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -138,7 +138,7 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->name = "System RAM"; res->start = start; res->end = start + size - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; if (request_resource(&iomem_resource, res) < 0) { pr_debug("System RAM resource %pR cannot be added\n", res); kfree(res); From 9a975bee4b3945b271bcff18a520d4863c210f8b Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:25 +0100 Subject: [PATCH 09/28] drivers: Initialize resource entry to zero I/O resource descriptor, 'desc' in struct resource, needs to be initialized to zero by default. Some drivers call kmalloc() to allocate a resource entry, but do not initialize it to zero by memset(). Change these drivers to call kzalloc(), instead. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Alexandre Bounine Acked-by: Helge Deller Acked-by: Rafael J. Wysocki Acked-by: Simon Horman Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-acpi@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-mm Cc: linux-parisc@vger.kernel.org Cc: linux-renesas-soc@vger.kernel.org Cc: linux-sh@vger.kernel.org Link: http://lkml.kernel.org/r/1453841853-11383-10-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/acpi/acpi_platform.c | 2 +- drivers/parisc/eisa_enumerator.c | 4 ++-- drivers/rapidio/rio.c | 8 ++++---- drivers/sh/superhyway/superhyway.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 296b7a14893a..b6f7fa3a1d40 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -62,7 +62,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) if (count < 0) { return NULL; } else if (count > 0) { - resources = kmalloc(count * sizeof(struct resource), + resources = kzalloc(count * sizeof(struct resource), GFP_KERNEL); if (!resources) { dev_err(&adev->dev, "No memory for resources\n"); diff --git a/drivers/parisc/eisa_enumerator.c b/drivers/parisc/eisa_enumerator.c index a656d9e83343..21905fef2cbf 100644 --- a/drivers/parisc/eisa_enumerator.c +++ b/drivers/parisc/eisa_enumerator.c @@ -91,7 +91,7 @@ static int configure_memory(const unsigned char *buf, for (i=0;iname = name; @@ -183,7 +183,7 @@ static int configure_port(const unsigned char *buf, struct resource *io_parent, for (i=0;iname = board; res->start = get_16(buf+len+1); res->end = get_16(buf+len+1)+(c&HPEE_PORT_SIZE_MASK)+1; diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index d7b87c64b7cd..e220edc85c68 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -117,7 +117,7 @@ int rio_request_inb_mbox(struct rio_mport *mport, if (mport->ops->open_inb_mbox == NULL) goto out; - res = kmalloc(sizeof(struct resource), GFP_KERNEL); + res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (res) { rio_init_mbox_res(res, mbox, mbox); @@ -185,7 +185,7 @@ int rio_request_outb_mbox(struct rio_mport *mport, if (mport->ops->open_outb_mbox == NULL) goto out; - res = kmalloc(sizeof(struct resource), GFP_KERNEL); + res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (res) { rio_init_mbox_res(res, mbox, mbox); @@ -285,7 +285,7 @@ int rio_request_inb_dbell(struct rio_mport *mport, { int rc = 0; - struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL); + struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (res) { rio_init_dbell_res(res, start, end); @@ -360,7 +360,7 @@ int rio_release_inb_dbell(struct rio_mport *mport, u16 start, u16 end) struct resource *rio_request_outb_dbell(struct rio_dev *rdev, u16 start, u16 end) { - struct resource *res = kmalloc(sizeof(struct resource), GFP_KERNEL); + struct resource *res = kzalloc(sizeof(struct resource), GFP_KERNEL); if (res) { rio_init_dbell_res(res, start, end); diff --git a/drivers/sh/superhyway/superhyway.c b/drivers/sh/superhyway/superhyway.c index 2d9e7f3d5611..bb1fb7712134 100644 --- a/drivers/sh/superhyway/superhyway.c +++ b/drivers/sh/superhyway/superhyway.c @@ -66,7 +66,7 @@ int superhyway_add_device(unsigned long base, struct superhyway_device *sdev, superhyway_read_vcr(dev, base, &dev->vcr); if (!dev->resource) { - dev->resource = kmalloc(sizeof(struct resource), GFP_KERNEL); + dev->resource = kzalloc(sizeof(struct resource), GFP_KERNEL); if (!dev->resource) { kfree(dev); return -ENOMEM; From bd7e6cb30ced147292d854a54d4a1f5c5a05d927 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:26 +0100 Subject: [PATCH 10/28] resource: Change walk_system_ram() to use System RAM type Now that all System RAM resource entries have been initialized to IORESOURCE_SYSTEM_RAM type, change walk_system_ram_res() and walk_system_ram_range() to call find_next_iomem_res() by setting @res.flags to IORESOURCE_SYSTEM_RAM and @name to NULL. With this change, they walk through the iomem table to find System RAM ranges without the need to do strcmp() on the resource names. No functional change is made to the interfaces. Signed-off-by: Toshi Kani [ Boris: fixup comments. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-11-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- kernel/resource.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 61512e972ece..994f1e41269b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -415,11 +415,11 @@ int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, } /* - * This function calls callback against all memory range of "System RAM" - * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. - * Now, this function is only for "System RAM". This function deals with - * full ranges and not pfn. If resources are not pfn aligned, dealing - * with pfn can truncate ranges. + * This function calls the @func callback against all memory ranges of type + * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. + * Now, this function is only for System RAM, it deals with full ranges and + * not PFNs. If resources are not PFN-aligned, dealing with PFNs can truncate + * ranges. */ int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)) @@ -430,10 +430,10 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, res.start = start; res.end = end; - res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (!find_next_iomem_res(&res, "System RAM", true))) { + (!find_next_iomem_res(&res, NULL, true))) { ret = (*func)(res.start, res.end, arg); if (ret) break; @@ -446,9 +446,9 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* - * This function calls callback against all memory range of "System RAM" - * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY. - * Now, this function is only for "System RAM". + * This function calls the @func callback against all memory ranges of type + * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. + * It is to be used only for System RAM. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) @@ -460,10 +460,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.start = (u64) start_pfn << PAGE_SHIFT; res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; - res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (find_next_iomem_res(&res, "System RAM", true) >= 0)) { + (find_next_iomem_res(&res, NULL, true) >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) @@ -484,7 +484,7 @@ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) } /* * This generic page_is_ram() returns true if specified address is - * registered as "System RAM" in iomem_resource list. + * registered as System RAM in iomem_resource list. */ int __weak page_is_ram(unsigned long pfn) { From 05fee7cfab7fa9d57e71f00bdd8fcff0cf5044a0 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:27 +0100 Subject: [PATCH 11/28] arm/samsung: Change s3c_pm_run_res() to use System RAM type Change s3c_pm_run_res() to check with IORESOURCE_SYSTEM_RAM, instead of strcmp() with "System RAM", to walk through System RAM ranges in the iomem table. No functional change is made to the interface. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Krzysztof Kozlowski Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kukjin Kim Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mm Cc: linux-samsung-soc@vger.kernel.org Link: http://lkml.kernel.org/r/1453841853-11383-12-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/arm/plat-samsung/pm-check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/plat-samsung/pm-check.c b/arch/arm/plat-samsung/pm-check.c index 04aff2c31b46..70f2f699bed3 100644 --- a/arch/arm/plat-samsung/pm-check.c +++ b/arch/arm/plat-samsung/pm-check.c @@ -53,8 +53,8 @@ static void s3c_pm_run_res(struct resource *ptr, run_fn_t fn, u32 *arg) if (ptr->child != NULL) s3c_pm_run_res(ptr->child, fn, arg); - if ((ptr->flags & IORESOURCE_MEM) && - strcmp(ptr->name, "System RAM") == 0) { + if ((ptr->flags & IORESOURCE_SYSTEM_RAM) + == IORESOURCE_SYSTEM_RAM) { S3C_PMDBG("Found system RAM at %08lx..%08lx\n", (unsigned long)ptr->start, (unsigned long)ptr->end); From 1c29f25bf5d6c557017f619b638c619cbbf798c4 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:28 +0100 Subject: [PATCH 12/28] memremap: Change region_intersects() to take @flags and @desc Change region_intersects() to identify a target with @flags and @desc, instead of @name with strcmp(). Change the callers of region_intersects(), memremap() and devm_memremap(), to set IORESOURCE_SYSTEM_RAM in @flags and IORES_DESC_NONE in @desc when searching System RAM. Also, export region_intersects() so that the ACPI EINJ error injection driver can call this function in a later patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Dan Williams Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jakub Sitnicki Cc: Jan Kara Cc: Jiang Liu Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vlastimil Babka Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 ++- kernel/memremap.c | 13 +++++++------ kernel/resource.c | 26 +++++++++++++++----------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f1cd22f2df1a..cd5a300d3397 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -385,7 +385,8 @@ enum { REGION_MIXED, }; -int region_intersects(resource_size_t offset, size_t size, const char *type); +int region_intersects(resource_size_t offset, size_t size, unsigned long flags, + unsigned long desc); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); diff --git a/kernel/memremap.c b/kernel/memremap.c index e517a16cb426..293309cac061 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -47,7 +47,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size) * being mapped does not have i/o side effects and the __iomem * annotation is not applicable. * - * MEMREMAP_WB - matches the default mapping for "System RAM" on + * MEMREMAP_WB - matches the default mapping for System RAM on * the architecture. This is usually a read-allocate write-back cache. * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM * memremap() will bypass establishing a new mapping and instead return @@ -56,11 +56,12 @@ static void *try_ram_remap(resource_size_t offset, size_t size) * MEMREMAP_WT - establish a mapping whereby writes either bypass the * cache or are written through to memory and never exist in a * cache-dirty state with respect to program visibility. Attempts to - * map "System RAM" with this mapping type will fail. + * map System RAM with this mapping type will fail. */ void *memremap(resource_size_t offset, size_t size, unsigned long flags) { - int is_ram = region_intersects(offset, size, "System RAM"); + int is_ram = region_intersects(offset, size, + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); void *addr = NULL; if (is_ram == REGION_MIXED) { @@ -76,7 +77,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) * MEMREMAP_WB is special in that it can be satisifed * from the direct map. Some archs depend on the * capability of memremap() to autodetect cases where - * the requested range is potentially in "System RAM" + * the requested range is potentially in System RAM. */ if (is_ram == REGION_INTERSECTS) addr = try_ram_remap(offset, size); @@ -88,7 +89,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags) * If we don't have a mapping yet and more request flags are * pending then we will be attempting to establish a new virtual * address mapping. Enforce that this mapping is not aliasing - * "System RAM" + * System RAM. */ if (!addr && is_ram == REGION_INTERSECTS && flags) { WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n", @@ -266,7 +267,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res, struct percpu_ref *ref, struct vmem_altmap *altmap) { int is_ram = region_intersects(res->start, resource_size(res), - "System RAM"); + IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE); resource_size_t key, align_start, align_size; struct dev_pagemap *pgmap; struct page_map *page_map; diff --git a/kernel/resource.c b/kernel/resource.c index 994f1e41269b..0041cedc47d6 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -496,31 +496,34 @@ EXPORT_SYMBOL_GPL(page_is_ram); * region_intersects() - determine intersection of region with known resources * @start: region start address * @size: size of region - * @name: name of resource (in iomem_resource) + * @flags: flags of resource (in iomem_resource) + * @desc: descriptor of resource (in iomem_resource) or IORES_DESC_NONE * * Check if the specified region partially overlaps or fully eclipses a - * resource identified by @name. Return REGION_DISJOINT if the region - * does not overlap @name, return REGION_MIXED if the region overlaps - * @type and another resource, and return REGION_INTERSECTS if the - * region overlaps @type and no other defined resource. Note, that - * REGION_INTERSECTS is also returned in the case when the specified - * region overlaps RAM and undefined memory holes. + * resource identified by @flags and @desc (optional with IORES_DESC_NONE). + * Return REGION_DISJOINT if the region does not overlap @flags/@desc, + * return REGION_MIXED if the region overlaps @flags/@desc and another + * resource, and return REGION_INTERSECTS if the region overlaps @flags/@desc + * and no other defined resource. Note that REGION_INTERSECTS is also + * returned in the case when the specified region overlaps RAM and undefined + * memory holes. * * region_intersect() is used by memory remapping functions to ensure * the user is not remapping RAM and is a vast speed up over walking * through the resource table page by page. */ -int region_intersects(resource_size_t start, size_t size, const char *name) +int region_intersects(resource_size_t start, size_t size, unsigned long flags, + unsigned long desc) { - unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; resource_size_t end = start + size - 1; int type = 0; int other = 0; struct resource *p; read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { - bool is_type = strcmp(p->name, name) == 0 && - ((p->flags & flags) == flags); + bool is_type = (((p->flags & flags) == flags) && + ((desc == IORES_DESC_NONE) || + (desc == p->desc))); if (start >= p->start && start <= p->end) is_type ? type++ : other++; @@ -539,6 +542,7 @@ int region_intersects(resource_size_t start, size_t size, const char *name) return REGION_DISJOINT; } +EXPORT_SYMBOL_GPL(region_intersects); void __weak arch_remove_reservations(struct resource *avail) { From 3f33647c41962401272bb60dce67e6094d14dbf2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:29 +0100 Subject: [PATCH 13/28] resource: Add walk_iomem_res_desc() Add a new interface, walk_iomem_res_desc(), which walks through the iomem table by identifying a target with @flags and @desc. This interface provides the same functionality as walk_iomem_res(), but does not use strcmp() to @name for better efficiency. walk_iomem_res() is deprecated and will be removed in a later patch. Requested-by: Borislav Petkov Signed-off-by: Toshi Kani [ Fixup comments. ] Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hanjun Guo Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Toshi Kani Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-14-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 3 ++ kernel/resource.c | 66 +++++++++++++++++++++++++++++++++++------- 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 983bea05d69c..2a4a5e839965 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -268,6 +268,9 @@ extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); extern int +walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, + void *arg, int (*func)(u64, u64, void *)); +extern int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); diff --git a/kernel/resource.c b/kernel/resource.c index 0041cedc47d6..37ed2fcb8246 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -333,14 +333,15 @@ int release_resource(struct resource *old) EXPORT_SYMBOL(release_resource); /* - * Finds the lowest iomem reosurce exists with-in [res->start.res->end) - * the caller must specify res->start, res->end, res->flags and "name". - * If found, returns 0, res is overwritten, if not found, returns -1. - * This walks through whole tree and not just first level children - * until and unless first_level_children_only is true. + * Finds the lowest iomem resource existing within [res->start.res->end). + * The caller must specify res->start, res->end, res->flags, and optionally + * desc and "name". If found, returns 0, res is overwritten, if not found, + * returns -1. + * This function walks the whole tree and not just first level children until + * and unless first_level_children_only is true. */ -static int find_next_iomem_res(struct resource *res, char *name, - bool first_level_children_only) +static int find_next_iomem_res(struct resource *res, unsigned long desc, + char *name, bool first_level_children_only) { resource_size_t start, end; struct resource *p; @@ -360,6 +361,8 @@ static int find_next_iomem_res(struct resource *res, char *name, for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) { if ((p->flags & res->flags) != res->flags) continue; + if ((desc != IORES_DESC_NONE) && (desc != p->desc)) + continue; if (name && strcmp(p->name, name)) continue; if (p->start > end) { @@ -385,12 +388,55 @@ static int find_next_iomem_res(struct resource *res, char *name, * Walks through iomem resources and calls func() with matching resource * ranges. This walks through whole tree and not just first level children. * All the memory ranges which overlap start,end and also match flags and + * desc are valid candidates. + * + * @desc: I/O resource descriptor. Use IORES_DESC_NONE to skip @desc check. + * @flags: I/O resource flags + * @start: start addr + * @end: end addr + * + * NOTE: For a new descriptor search, define a new IORES_DESC in + * and set it in 'desc' of a target resource entry. + */ +int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, + u64 end, void *arg, int (*func)(u64, u64, void *)) +{ + struct resource res; + u64 orig_end; + int ret = -1; + + res.start = start; + res.end = end; + res.flags = flags; + orig_end = res.end; + + while ((res.start < res.end) && + (!find_next_iomem_res(&res, desc, NULL, false))) { + + ret = (*func)(res.start, res.end, arg); + if (ret) + break; + + res.start = res.end + 1; + res.end = orig_end; + } + + return ret; +} + +/* + * Walks through iomem resources and calls @func with matching resource + * ranges. This walks the whole tree and not just first level children. + * All the memory ranges which overlap start,end and also match flags and * name are valid candidates. * * @name: name of resource * @flags: resource flags * @start: start addr * @end: end addr + * + * NOTE: This function is deprecated and should not be used in new code. + * Use walk_iomem_res_desc(), instead. */ int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)) @@ -404,7 +450,7 @@ int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, res.flags = flags; orig_end = res.end; while ((res.start < res.end) && - (!find_next_iomem_res(&res, name, false))) { + (!find_next_iomem_res(&res, IORES_DESC_NONE, name, false))) { ret = (*func)(res.start, res.end, arg); if (ret) break; @@ -433,7 +479,7 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (!find_next_iomem_res(&res, NULL, true))) { + (!find_next_iomem_res(&res, IORES_DESC_NONE, NULL, true))) { ret = (*func)(res.start, res.end, arg); if (ret) break; @@ -463,7 +509,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (find_next_iomem_res(&res, NULL, true) >= 0)) { + (find_next_iomem_res(&res, IORES_DESC_NONE, NULL, true) >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) From f0f4711aa16b82016c0b6e59871934bbd71258da Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:30 +0100 Subject: [PATCH 14/28] x86, kexec, nvdimm: Use walk_iomem_res_desc() for iomem search Change the callers of walk_iomem_res() scanning for the following resources by name to use walk_iomem_res_desc() instead. "ACPI Tables" "ACPI Non-volatile Storage" "Persistent Memory (legacy)" "Crash kernel" Note, the caller of walk_iomem_res() with "GART" will be removed in a later patch. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Dave Young Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Chun-Yi Cc: Dan Williams Cc: Denys Vlasenko Cc: Don Zickus Cc: H. Peter Anvin Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Minfei Huang Cc: Peter Zijlstra (Intel) Cc: Ross Zwisler Cc: Stephen Rothwell Cc: Takao Indoh Cc: Thomas Gleixner Cc: Toshi Kani Cc: kexec@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: linux-mm Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/1453841853-11383-15-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 4 ++-- arch/x86/kernel/pmem.c | 4 ++-- drivers/nvdimm/e820.c | 2 +- kernel/kexec_file.c | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 58f34319b29a..35e152eeb6e0 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -599,12 +599,12 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params) /* Add ACPI tables */ cmd.type = E820_ACPI; flags = IORESOURCE_MEM | IORESOURCE_BUSY; - walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd, memmap_entry_callback); /* Add ACPI Non-volatile Storage */ cmd.type = E820_NVS; - walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd, + walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd, memmap_entry_callback); /* Add crashk_low_res region */ diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 14415aff1813..92f70147a9a6 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -13,11 +13,11 @@ static int found(u64 start, u64 end, void *data) static __init int register_e820_pmem(void) { - char *pmem = "Persistent Memory (legacy)"; struct platform_device *pdev; int rc; - rc = walk_iomem_res(pmem, IORESOURCE_MEM, 0, -1, NULL, found); + rc = walk_iomem_res_desc(IORES_DESC_PERSISTENT_MEMORY_LEGACY, + IORESOURCE_MEM, 0, -1, NULL, found); if (rc <= 0) return 0; diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index b0045a505dc8..95825b38559a 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -55,7 +55,7 @@ static int e820_pmem_probe(struct platform_device *pdev) for (p = iomem_resource.child; p ; p = p->sibling) { struct nd_region_desc ndr_desc; - if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0) + if (p->desc != IORES_DESC_PERSISTENT_MEMORY_LEGACY) continue; memset(&ndr_desc, 0, sizeof(ndr_desc)); diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 2bfcdc064116..56b18eb1f001 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -524,10 +524,10 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz, /* Walk the RAM ranges and allocate a suitable range for the buffer */ if (image->type == KEXEC_TYPE_CRASH) - ret = walk_iomem_res("Crash kernel", - IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, - crashk_res.start, crashk_res.end, kbuf, - locate_mem_hole_callback); + ret = walk_iomem_res_desc(crashk_res.desc, + IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, + crashk_res.start, crashk_res.end, kbuf, + locate_mem_hole_callback); else ret = walk_system_ram_res(0, -1, kbuf, locate_mem_hole_callback); From f296f2634920d205b93d878b48d87bb7e0a4c256 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:31 +0100 Subject: [PATCH 15/28] x86/kexec: Remove walk_iomem_res() call with GART type There is no longer any driver inserting a "GART" region in the kernel since 707d4eefbdb3 ("Revert "[PATCH] Insert GART region into resource map""). Remove the call to walk_iomem_res() with "GART" type, its callback function, and GART-specific variables set by the callback. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Dave Young Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Chun-Yi Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Minfei Huang Cc: Peter Zijlstra (Intel) Cc: Stephen Rothwell Cc: Takao Indoh Cc: Thomas Gleixner Cc: Toshi Kani Cc: Viresh Kumar Cc: kexec@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-16-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/crash.c | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 35e152eeb6e0..9ef978d69c22 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -57,10 +57,9 @@ struct crash_elf_data { struct kimage *image; /* * Total number of ram ranges we have after various adjustments for - * GART, crash reserved region etc. + * crash reserved region, etc. */ unsigned int max_nr_ranges; - unsigned long gart_start, gart_end; /* Pointer to elf header */ void *ehdr; @@ -201,17 +200,6 @@ static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) return 0; } -static int get_gart_ranges_callback(u64 start, u64 end, void *arg) -{ - struct crash_elf_data *ced = arg; - - ced->gart_start = start; - ced->gart_end = end; - - /* Not expecting more than 1 gart aperture */ - return 1; -} - /* Gather all the required information to prepare elf headers for ram regions */ static void fill_up_crash_elf_data(struct crash_elf_data *ced, @@ -226,22 +214,6 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->max_nr_ranges = nr_ranges; - /* - * We don't create ELF headers for GART aperture as an attempt - * to dump this memory in second kernel leads to hang/crash. - * If gart aperture is present, one needs to exclude that region - * and that could lead to need of extra phdr. - */ - walk_iomem_res("GART", IORESOURCE_MEM, 0, -1, - ced, get_gart_ranges_callback); - - /* - * If we have gart region, excluding that could potentially split - * a memory range, resulting in extra header. Account for that. - */ - if (ced->gart_end) - ced->max_nr_ranges++; - /* Exclusion of crash region could split memory ranges */ ced->max_nr_ranges++; @@ -350,13 +322,6 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, return ret; } - /* Exclude GART region */ - if (ced->gart_end) { - ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end); - if (ret) - return ret; - } - return ret; } From a8fc42530ddd19d7580fe8c9f2ea86220a97e94c Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:32 +0100 Subject: [PATCH 16/28] resource: Kill walk_iomem_res() walk_iomem_res_desc() replaced walk_iomem_res() and there is no caller to walk_iomem_res() any more. Kill it. Also remove @name from find_next_iomem_res() as it is no longer used. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Acked-by: Dave Young Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Hanjun Guo Cc: Jakub Sitnicki Cc: Jiang Liu Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vinod Koul Cc: linux-arch@vger.kernel.org Cc: linux-mm Link: http://lkml.kernel.org/r/1453841853-11383-17-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/ioport.h | 3 --- kernel/resource.c | 49 +++++------------------------------------- 2 files changed, 5 insertions(+), 47 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 2a4a5e839965..afb45597fb5f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -270,9 +270,6 @@ walk_system_ram_res(u64 start, u64 end, void *arg, extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)); -extern int -walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/kernel/resource.c b/kernel/resource.c index 37ed2fcb8246..49834309043c 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -335,13 +335,12 @@ EXPORT_SYMBOL(release_resource); /* * Finds the lowest iomem resource existing within [res->start.res->end). * The caller must specify res->start, res->end, res->flags, and optionally - * desc and "name". If found, returns 0, res is overwritten, if not found, - * returns -1. + * desc. If found, returns 0, res is overwritten, if not found, returns -1. * This function walks the whole tree and not just first level children until * and unless first_level_children_only is true. */ static int find_next_iomem_res(struct resource *res, unsigned long desc, - char *name, bool first_level_children_only) + bool first_level_children_only) { resource_size_t start, end; struct resource *p; @@ -363,8 +362,6 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc, continue; if ((desc != IORES_DESC_NONE) && (desc != p->desc)) continue; - if (name && strcmp(p->name, name)) - continue; if (p->start > end) { p = NULL; break; @@ -411,7 +408,7 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, orig_end = res.end; while ((res.start < res.end) && - (!find_next_iomem_res(&res, desc, NULL, false))) { + (!find_next_iomem_res(&res, desc, false))) { ret = (*func)(res.start, res.end, arg); if (ret) @@ -424,42 +421,6 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, return ret; } -/* - * Walks through iomem resources and calls @func with matching resource - * ranges. This walks the whole tree and not just first level children. - * All the memory ranges which overlap start,end and also match flags and - * name are valid candidates. - * - * @name: name of resource - * @flags: resource flags - * @start: start addr - * @end: end addr - * - * NOTE: This function is deprecated and should not be used in new code. - * Use walk_iomem_res_desc(), instead. - */ -int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, - void *arg, int (*func)(u64, u64, void *)) -{ - struct resource res; - u64 orig_end; - int ret = -1; - - res.start = start; - res.end = end; - res.flags = flags; - orig_end = res.end; - while ((res.start < res.end) && - (!find_next_iomem_res(&res, IORES_DESC_NONE, name, false))) { - ret = (*func)(res.start, res.end, arg); - if (ret) - break; - res.start = res.end + 1; - res.end = orig_end; - } - return ret; -} - /* * This function calls the @func callback against all memory ranges of type * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. @@ -479,7 +440,7 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (!find_next_iomem_res(&res, IORES_DESC_NONE, NULL, true))) { + (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) { ret = (*func)(res.start, res.end, arg); if (ret) break; @@ -509,7 +470,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; orig_end = res.end; while ((res.start < res.end) && - (find_next_iomem_res(&res, IORES_DESC_NONE, NULL, true) >= 0)) { + (find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) From 4650bac1fc45d64aef62ab99aa4db93d41dedbd9 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 26 Jan 2016 21:57:33 +0100 Subject: [PATCH 17/28] ACPI/EINJ: Allow memory error injection to NVDIMM In the case of memory error injection, einj_error_inject() checks if a target address is System RAM. Change this check to allow injecting a memory error into NVDIMM memory by calling region_intersects() with IORES_DESC_PERSISTENT_MEMORY. This enables memory error testing on both System RAM and NVDIMM. In addition, page_is_ram() is replaced with region_intersects() with IORESOURCE_SYSTEM_RAM, so that it can verify a target address range with the requested size. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Reviewed-by: Dan Williams Acked-by: Tony Luck Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jarkko Nikula Cc: Len Brown Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Toshi Kani Cc: Vishal Verma Cc: linux-acpi@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-mm Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/1453841853-11383-18-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- drivers/acpi/apei/einj.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 0431883653be..559c1173de1c 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -519,7 +519,7 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, u64 param3, u64 param4) { int rc; - unsigned long pfn; + u64 base_addr, size; /* If user manually set "flags", make sure it is legal */ if (flags && (flags & @@ -545,10 +545,17 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, /* * Disallow crazy address masks that give BIOS leeway to pick * injection address almost anywhere. Insist on page or - * better granularity and that target address is normal RAM. + * better granularity and that target address is normal RAM or + * NVDIMM. */ - pfn = PFN_DOWN(param1 & param2); - if (!page_is_ram(pfn) || ((param2 & PAGE_MASK) != PAGE_MASK)) + base_addr = param1 & param2; + size = ~param2 + 1; + + if (((param2 & PAGE_MASK) != PAGE_MASK) || + ((region_intersects(base_addr, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) + != REGION_INTERSECTS) && + (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY) + != REGION_INTERSECTS))) return -EINVAL; inject: From d9cbe09d39aa13f6924dc5fb88325de7ef01a72e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:14:36 -0800 Subject: [PATCH 18/28] libnvdimm, pmem: fix 'pfn' support for section-misaligned namespaces The altmap for a section-misaligned namespace needs to arrange for the base_pfn to be section-aligned. As a result the 'reserve' region (pfns from base that do not have a struct page) must be increased. Otherwise we trip the altmap validation check in __add_pages: if (altmap->base_pfn != phys_start_pfn || vmem_altmap_offset(altmap) > nr_pages) { pr_warn_once("memory add fail, invalid altmap\n"); return -EINVAL; } Signed-off-by: Dan Williams --- drivers/nvdimm/pfn.h | 13 +++++++++++++ drivers/nvdimm/pmem.c | 24 ++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index cc243754acef..6ee707e5b279 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -15,6 +15,7 @@ #define __NVDIMM_PFN_H #include +#include #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" @@ -32,4 +33,16 @@ struct nd_pfn_sb { u8 padding[4012]; __le64 checksum; }; + +#ifdef CONFIG_SPARSEMEM +#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x) +#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x) +#else +/* + * In this case ZONE_DEVICE=n and we will disable 'pfn' device support, + * but we still want pmem to compile. + */ +#define PFN_SECTION_ALIGN_DOWN(x) (x) +#define PFN_SECTION_ALIGN_UP(x) (x) +#endif #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..59d568ab7556 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -356,6 +356,26 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) return 0; } +/* + * We hotplug memory at section granularity, pad the reserved area from + * the previous section base to the namespace base address. + */ +static unsigned long init_altmap_base(resource_size_t base) +{ + unsigned long base_pfn = __phys_to_pfn(base); + + return PFN_SECTION_ALIGN_DOWN(base_pfn); +} + +static unsigned long init_altmap_reserve(resource_size_t base) +{ + unsigned long reserve = __phys_to_pfn(SZ_8K); + unsigned long base_pfn = __phys_to_pfn(base); + + reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); + return reserve; +} + static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); @@ -369,8 +389,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) phys_addr_t offset; int rc; struct vmem_altmap __altmap = { - .base_pfn = __phys_to_pfn(nsio->res.start), - .reserve = __phys_to_pfn(SZ_8K), + .base_pfn = init_altmap_base(nsio->res.start), + .reserve = init_altmap_reserve(nsio->res.start), }; if (!nd_pfn->uuid || !nd_pfn->ndns) From cfe30b872058f211630eda7f65fb19d83beaaa3c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:38:00 -0800 Subject: [PATCH 19/28] libnvdimm, pmem: adjust for section collisions with 'System RAM' On a platform where 'Persistent Memory' and 'System RAM' are mixed within a given sparsemem section, trim the namespace and notify about the sub-optimal alignment. Cc: Toshi Kani Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 7 ++ drivers/nvdimm/pfn.h | 10 ++- drivers/nvdimm/pfn_devs.c | 5 ++ drivers/nvdimm/pmem.c | 125 +++++++++++++++++++++++--------- 4 files changed, 111 insertions(+), 36 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9edf7eb7d17c..f5cb88601359 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -133,6 +133,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) return false; @@ -143,6 +144,12 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + nsio = to_nd_namespace_io(dev); + if (region_intersects(nsio->res.start, resource_size(&nsio->res), + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) + return false; + #ifdef ARCH_MEMREMAP_PMEM return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; #else diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index 6ee707e5b279..8e343a3ca873 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -27,10 +27,13 @@ struct nd_pfn_sb { __le32 flags; __le16 version_major; __le16 version_minor; - __le64 dataoff; + __le64 dataoff; /* relative to namespace_base + start_pad */ __le64 npfns; __le32 mode; - u8 padding[4012]; + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; + u8 padding[4004]; __le64 checksum; }; @@ -45,4 +48,7 @@ struct nd_pfn_sb { #define PFN_SECTION_ALIGN_DOWN(x) (x) #define PFN_SECTION_ALIGN_UP(x) (x) #endif + +#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x))) +#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x))) #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ae81a2f1da50..75a31a7359fb 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -299,6 +299,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) return -ENODEV; + if (__le16_to_cpu(pfn_sb->version_minor) < 1) { + pfn_sb->start_pad = 0; + pfn_sb->end_trunc = 0; + } + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 59d568ab7556..0cb450e1b400 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -43,7 +43,10 @@ struct pmem_device { phys_addr_t data_offset; u64 pfn_flags; void __pmem *virt_addr; + /* immutable base size of the namespace */ size_t size; + /* trim size when namespace capacity has been section aligned */ + u32 pfn_pad; struct badblocks bb; }; @@ -145,7 +148,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); - return pmem->size - offset; + return pmem->size - pmem->pfn_pad - offset; } static const struct block_device_operations pmem_fops = { @@ -236,7 +239,8 @@ static int pmem_attach_disk(struct device *dev, disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); disk->driverfs_dev = dev; - set_capacity(disk, (pmem->size - pmem->data_offset) / 512); + set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) + / 512); pmem->pmem_disk = disk; devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) @@ -279,6 +283,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = 0, end_trunc = 0; + resource_size_t start, size; + struct nd_namespace_io *nsio; struct nd_region *nd_region; unsigned long npfns; phys_addr_t offset; @@ -304,21 +311,56 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } memset(pfn_sb, 0, sizeof(*pfn_sb)); - npfns = (pmem->size - SZ_8K) / SZ_4K; + + /* + * Check if pmem collides with 'System RAM' when section aligned and + * trim it accordingly + */ + nsio = to_nd_namespace_io(&ndns->dev); + start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); + size = resource_size(&nsio->res); + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + + start = nsio->res.start; + start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + } + + start = nsio->res.start; + size = PHYS_SECTION_ALIGN_UP(start + size) - start; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + size = resource_size(&nsio->res); + end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + } + + if (start_pad + end_trunc) + dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_name(&ndns->dev), start_pad + end_trunc); + /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ + start += start_pad; + npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align); + offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) + - start; else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(SZ_8K, nd_pfn->align); + offset = ALIGN(start + SZ_8K, nd_pfn->align) - start; else goto err; - npfns = (pmem->size - offset) / SZ_4K; + if (offset + start_pad + end_trunc >= pmem->size) { + dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", + dev_name(&ndns->dev)); + goto err; + } + + npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -326,6 +368,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); + pfn_sb->version_minor = cpu_to_le16(1); + pfn_sb->start_pad = cpu_to_le32(start_pad); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); @@ -376,41 +421,36 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); - struct device *dev = &nd_pfn->dev; - struct nd_region *nd_region; - struct vmem_altmap *altmap; - struct nd_pfn_sb *pfn_sb; - struct pmem_device *pmem; - struct request_queue *q; - phys_addr_t offset; int rc; + struct resource res; + struct request_queue *q; + struct pmem_device *pmem; + struct vmem_altmap *altmap; + struct device *dev = &nd_pfn->dev; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { - .base_pfn = init_altmap_base(nsio->res.start), - .reserve = init_altmap_reserve(nsio->res.start), + .base_pfn = init_altmap_base(base), + .reserve = init_altmap_reserve(base), }; - if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; - - nd_region = to_nd_region(dev->parent); - rc = nd_pfn_init(nd_pfn); - if (rc) - return rc; - - pfn_sb = nd_pfn->pfn_sb; - offset = le64_to_cpu(pfn_sb->dataoff); + pmem = dev_get_drvdata(dev); + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = start_pad + end_trunc; nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (pmem->data_offset < SZ_8K) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(&nsio->res) - offset) + nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, @@ -418,7 +458,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->free = __phys_to_pfn(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; @@ -426,10 +466,12 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* establish pfn range for lookup, and switch to direct map */ - pmem = dev_get_drvdata(dev); q = pmem->pmem_queue; + memcpy(&res, &nsio->res, sizeof(res)); + res.start += start_pad; + res.end -= end_trunc; devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { @@ -438,7 +480,6 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* attach pmem disk in "pfn-mode" */ - pmem->data_offset = offset; rc = pmem_attach_disk(dev, ndns, pmem); if (rc) goto err; @@ -447,6 +488,22 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) err: nvdimm_namespace_detach_pfn(ndns); return rc; + +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + /* we need a valid pfn_sb before we can init a vmem_altmap */ + return __nvdimm_namespace_attach_pfn(nd_pfn); } static int nd_pmem_probe(struct device *dev) From f6ed58c70d14572d0272ee129579dbfc97b97f50 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:46:04 -0800 Subject: [PATCH 20/28] libnvdimm, pfn: 'resource'-address and 'size' attributes for pfn devices Currenty with a raw mode pmem namespace the physical memory address range for the device can be obtained via /sys/block/pmemX/device/{resource|size}. Add similar attributes for pfn instances that takes the struct page memmap and section padding into account. Reported-by: Haozhong Zhang Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 75a31a7359fb..254d3bc13f70 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -205,11 +205,67 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start + + start_pad + offset); + } else { + /* no address to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(resource); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nsio->res) - start_pad + - end_trunc - offset); + } else { + /* no size to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_pfn_attributes[] = { &dev_attr_mode.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_align.attr, + &dev_attr_resource.attr, + &dev_attr_size.attr, NULL, }; From 45f68802f2542a6ad1550dab9c07004de6e0df40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 6 Mar 2016 08:04:12 -0800 Subject: [PATCH 21/28] libnvdimm, pmem: fix ia64 build, use PHYS_PFN drivers/nvdimm/pmem.c: In function 'nvdimm_namespace_attach_pfn': drivers/nvdimm/pmem.c:367:3: error: implicit declaration of function '__phys_to_pfn' [-Werror=implicit-function-declaration] .base_pfn = __phys_to_pfn(nsio->res.start), ia64 does not provide __phys_to_pfn(), just use the PHYS_PFN() alias. Cc: Guenter Roeck Reported-by: kbuild test robot Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0cb450e1b400..74e2569910d8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -407,15 +407,15 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) */ static unsigned long init_altmap_base(resource_size_t base) { - unsigned long base_pfn = __phys_to_pfn(base); + unsigned long base_pfn = PHYS_PFN(base); return PFN_SECTION_ALIGN_DOWN(base_pfn); } static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = __phys_to_pfn(SZ_8K); - unsigned long base_pfn = __phys_to_pfn(base); + unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); return reserve; @@ -458,7 +458,7 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(pmem->data_offset - SZ_8K); + altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; From 4e0d8f7eff3fbfa3e3ac5782669c078f590dc9e2 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:03 -0700 Subject: [PATCH 22/28] resource: Change __request_region to inherit from immediate parent __request_region() sets 'flags' of a new resource from @parent as it inherits the parent's attribute. When a target resource has a conflict, this function inserts the new resource entry under the conflicted entry by updating @parent. In this case, the new resource entry needs to inherit attribute from the updated parent. This conflict is a typical case since __request_region() is used to allocate a new resource from a specific resource range. For instance, request_mem_region() calls __request_region() with @parent set to &iomem_resource, which is the root entry of the whole iomem range. When this request results in inserting a new entry "DEV-A" under "BUS-1", "DEV-A" needs to inherit from the immediate parent "BUS-1" as it holds specific attribute for the range. root (&iomem_resource) : + "BUS-1" + "DEV-A" Change __request_region() to set 'flags' and 'desc' of a new entry from the immediate parent. Signed-off-by: Toshi Kani Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Cc: Dan Williams Signed-off-by: Dan Williams --- kernel/resource.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/resource.c b/kernel/resource.c index 4d466052426b..5a56e8f24058 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1085,15 +1085,16 @@ struct resource * __request_region(struct resource *parent, res->name = name; res->start = start; res->end = start + n - 1; - res->flags = resource_type(parent) | resource_ext_type(parent); - res->flags |= IORESOURCE_BUSY | flags; - res->desc = IORES_DESC_NONE; write_lock(&resource_lock); for (;;) { struct resource *conflict; + res->flags = resource_type(parent) | resource_ext_type(parent); + res->flags |= IORESOURCE_BUSY | flags; + res->desc = parent->desc; + conflict = __request_resource(parent, res); if (!conflict) break; From ff3cc952d3f009e6c376cc40651b87187ce364a6 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:04 -0700 Subject: [PATCH 23/28] resource: Add remove_resource interface insert_resource() and insert_resource_conflict() are called by resource producers to insert a new resource. When there is any conflict, they move conflicting resources down to the children of the new resource. There is no destructor of these interfaces, however. Add remove_resource(), which removes a resource previously inserted by insert_resource() or insert_resource_conflict(), and moves the children up to where they were before. __release_resource() is changed to have @release_child, so that this function can be used for remove_resource() as well. Also add comments to clarify that these functions are intended for producers of resources to avoid any confusion with request/release_resource() for consumers. Signed-off-by: Toshi Kani Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Cc: Dan Williams Signed-off-by: Dan Williams --- include/linux/ioport.h | 1 + kernel/resource.c | 51 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 5 deletions(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index afb45597fb5f..8017b8bf45fa 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -174,6 +174,7 @@ extern void reserve_region_with_split(struct resource *root, extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new); extern int insert_resource(struct resource *parent, struct resource *new); extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); +extern int remove_resource(struct resource *old); extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, diff --git a/kernel/resource.c b/kernel/resource.c index 5a56e8f24058..effb6ee2c3e8 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -233,9 +233,9 @@ static struct resource * __request_resource(struct resource *root, struct resour } } -static int __release_resource(struct resource *old) +static int __release_resource(struct resource *old, bool release_child) { - struct resource *tmp, **p; + struct resource *tmp, **p, *chd; p = &old->parent->child; for (;;) { @@ -243,7 +243,17 @@ static int __release_resource(struct resource *old) if (!tmp) break; if (tmp == old) { - *p = tmp->sibling; + if (release_child || !(tmp->child)) { + *p = tmp->sibling; + } else { + for (chd = tmp->child;; chd = chd->sibling) { + chd->parent = tmp->parent; + if (!(chd->sibling)) + break; + } + *p = tmp->child; + chd->sibling = tmp->sibling; + } old->parent = NULL; return 0; } @@ -325,7 +335,7 @@ int release_resource(struct resource *old) int retval; write_lock(&resource_lock); - retval = __release_resource(old); + retval = __release_resource(old, true); write_unlock(&resource_lock); return retval; } @@ -679,7 +689,7 @@ static int reallocate_resource(struct resource *root, struct resource *old, old->start = new.start; old->end = new.end; } else { - __release_resource(old); + __release_resource(old, true); *old = new; conflict = __request_resource(root, old); BUG_ON(conflict); @@ -825,6 +835,9 @@ static struct resource * __insert_resource(struct resource *parent, struct resou * entirely fit within the range of the new resource, then the new * resource is inserted and the conflicting resources become children of * the new resource. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ struct resource *insert_resource_conflict(struct resource *parent, struct resource *new) { @@ -842,6 +855,9 @@ struct resource *insert_resource_conflict(struct resource *parent, struct resour * @new: new resource to insert * * Returns 0 on success, -EBUSY if the resource can't be inserted. + * + * This function is intended for producers of resources, such as FW modules + * and bus drivers. */ int insert_resource(struct resource *parent, struct resource *new) { @@ -885,6 +901,31 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new) write_unlock(&resource_lock); } +/** + * remove_resource - Remove a resource in the resource tree + * @old: resource to remove + * + * Returns 0 on success, -EINVAL if the resource is not valid. + * + * This function removes a resource previously inserted by insert_resource() + * or insert_resource_conflict(), and moves the children (if any) up to + * where they were before. insert_resource() and insert_resource_conflict() + * insert a new resource, and move any conflicting resources down to the + * children of the new resource. + * + * insert_resource(), insert_resource_conflict() and remove_resource() are + * intended for producers of resources, such as FW modules and bus drivers. + */ +int remove_resource(struct resource *old) +{ + int retval; + + write_lock(&resource_lock); + retval = __release_resource(old, false); + write_unlock(&resource_lock); + return retval; +} + static int __adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) { From 8095d0f225fe31eaac4a013177b77ed5283278f8 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:05 -0700 Subject: [PATCH 24/28] resource: Export insert_resource and remove_resource insert_resource() and remove_resouce() are called by producers of resources, such as FW modules and bus drivers. These modules may be implemented as loadable modules. Export insert_resource() and remove_resouce() so that they can be called from such modules. link: https://lkml.org/lkml/2016/3/8/872 Signed-off-by: Toshi Kani Cc: Linus Torvalds Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Cc: Dan Williams Signed-off-by: Dan Williams --- kernel/resource.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/resource.c b/kernel/resource.c index effb6ee2c3e8..2e78ead30934 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -866,6 +866,7 @@ int insert_resource(struct resource *parent, struct resource *new) conflict = insert_resource_conflict(parent, new); return conflict ? -EBUSY : 0; } +EXPORT_SYMBOL_GPL(insert_resource); /** * insert_resource_expand_to_fit - Insert a resource into the resource tree @@ -925,6 +926,7 @@ int remove_resource(struct resource *old) write_unlock(&resource_lock); return retval; } +EXPORT_SYMBOL_GPL(remove_resource); static int __adjust_resource(struct resource *res, resource_size_t start, resource_size_t size) From af1996ef59dbcb36fe4878df7c717a02eb89d07a Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:06 -0700 Subject: [PATCH 25/28] ACPI: Change NFIT driver to insert new resource ACPI 6 defines persistent memory (PMEM) ranges in multiple firmware interfaces, e820, EFI, and ACPI NFIT table. This EFI change, however, leads to hit a bug in the grub bootloader, which treats EFI_PERSISTENT_MEMORY type as regular memory and corrupts stored user data [1]. Therefore, BIOS may set generic reserved type in e820 and EFI to cover PMEM ranges. The kernel can initialize PMEM ranges from ACPI NFIT table alone. This scheme causes a problem in the iomem table, though. On x86, for instance, e820_reserve_resources() initializes top-level entries (iomem_resource.child) from the e820 table at early boot-time. This creates "reserved" entry for a PMEM range, which does not allow region_intersects() to check with PMEM type. Change acpi_nfit_register_region() to call acpi_nfit_insert_resource(), which calls insert_resource() to insert a PMEM entry from NFIT when the iomem table does not have a PMEM entry already. That is, when a PMEM range is marked as reserved type in e820, it inserts "Persistent Memory" entry, which results as follows. + "Persistent Memory" + "reserved" This allows the EINJ driver, which calls region_intersects() to check PMEM ranges, to work continuously even if BIOS sets reserved type (or sets nothing) to PMEM ranges in e820 and EFI. [1]: https://lists.gnu.org/archive/html/grub-devel/2015-11/msg00209.html Signed-off-by: Toshi Kani Cc: Rafael J. Wysocki Cc: Dan Williams Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index fb53db187854..269de5f75d98 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1571,6 +1571,48 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, return 0; } +static void acpi_nfit_remove_resource(void *data) +{ + struct resource *res = data; + + remove_resource(res); +} + +static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc) +{ + struct resource *res, *nd_res = ndr_desc->res; + int is_pmem, ret; + + /* No operation if the region is already registered as PMEM */ + is_pmem = region_intersects(nd_res->start, resource_size(nd_res), + IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY); + if (is_pmem == REGION_INTERSECTS) + return 0; + + res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->name = "Persistent Memory"; + res->start = nd_res->start; + res->end = nd_res->end; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_PERSISTENT_MEMORY; + + ret = insert_resource(&iomem_resource, res); + if (ret) + return ret; + + ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); + if (ret) { + remove_resource(res); + return ret; + } + + return 0; +} + static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, struct nd_region_desc *ndr_desc) { @@ -1781,6 +1823,12 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { + rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc); + if (rc) + dev_warn(acpi_desc->dev, + "failed to insert pmem resource to iomem: %d\n", + rc); + rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); if (rc) { dev_err(acpi_desc->dev, From 55155291b32d24371256adbcc67f9f53cf3f314f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Mar 2016 09:21:54 +1100 Subject: [PATCH 26/28] pmem: don't allocate unused major device number When alloc_disk(0) or alloc_disk-node(0, XX) is used, the ->major number is completely ignored: all devices are allocated with a major of BLOCK_EXT_MAJOR. So there is no point allocating pmem_major. Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 74e2569910d8..ba8d5b6bfad0 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -50,8 +50,6 @@ struct pmem_device { struct badblocks bb; }; -static int pmem_major; - static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) { if (bb->count) { @@ -231,8 +229,6 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; } - disk->major = pmem_major; - disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; @@ -579,26 +575,13 @@ static struct nd_device_driver nd_pmem_driver = { static int __init pmem_init(void) { - int error; - - pmem_major = register_blkdev(0, "pmem"); - if (pmem_major < 0) - return pmem_major; - - error = nd_driver_register(&nd_pmem_driver); - if (error) { - unregister_blkdev(pmem_major, "pmem"); - return error; - } - - return 0; + return nd_driver_register(&nd_pmem_driver); } module_init(pmem_init); static void pmem_exit(void) { driver_unregister(&nd_pmem_driver.drv); - unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); From ec56151d382c2140851b4f25203af9016ba84fea Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 10 Mar 2016 08:59:28 +1100 Subject: [PATCH 27/28] nvdimm/blk: don't allocate unused major device number When alloc_disk(0) is used ->major is completely ignored, all devices are allocated with a "major" of BLOCK_EXT_MAJOR. So don't allocate nd_blk_major Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 91a336ea8c4f..e9ff9229d942 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -31,8 +31,6 @@ struct nd_blk_device { u32 internal_lbasize; }; -static int nd_blk_major; - static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) { return blk_dev->nsblk->lbasize - blk_dev->sector_size; @@ -264,7 +262,6 @@ static int nd_blk_attach_disk(struct nd_namespace_common *ndns, } disk->driverfs_dev = &ndns->dev; - disk->major = nd_blk_major; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->private_data = blk_dev; @@ -358,25 +355,12 @@ static struct nd_device_driver nd_blk_driver = { static int __init nd_blk_init(void) { - int rc; - - rc = register_blkdev(0, "nd_blk"); - if (rc < 0) - return rc; - - nd_blk_major = rc; - rc = nd_driver_register(&nd_blk_driver); - - if (rc < 0) - unregister_blkdev(nd_blk_major, "nd_blk"); - - return rc; + return nd_driver_register(&nd_blk_driver); } static void __exit nd_blk_exit(void) { driver_unregister(&nd_blk_driver.drv); - unregister_blkdev(nd_blk_major, "nd_blk"); } MODULE_AUTHOR("Ross Zwisler "); From ff8e92d5d94b99aab39f439d532cba435947dfc0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 10 Mar 2016 08:59:28 +1100 Subject: [PATCH 28/28] nvdimm/btt: don't allocate unused major device number alloc_disk(0) does not require or use a ->major number, all devices are allocated with a major of BLOCK_EXT_MAJOR. So don't allocate btt_major. Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index efb2c1ceef98..c32cbb593600 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -31,8 +31,6 @@ enum log_ent_request { LOG_OLD_ENT }; -static int btt_major; - static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, void *buf, size_t n) { @@ -1246,7 +1244,6 @@ static int btt_blk_init(struct btt *btt) nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; - btt->btt_disk->major = btt_major; btt->btt_disk->first_minor = 0; btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; @@ -1423,22 +1420,11 @@ EXPORT_SYMBOL(nvdimm_namespace_detach_btt); static int __init nd_btt_init(void) { - int rc; - - btt_major = register_blkdev(0, "btt"); - if (btt_major < 0) - return btt_major; + int rc = 0; debugfs_root = debugfs_create_dir("btt", NULL); - if (IS_ERR_OR_NULL(debugfs_root)) { + if (IS_ERR_OR_NULL(debugfs_root)) rc = -ENXIO; - goto err_debugfs; - } - - return 0; - - err_debugfs: - unregister_blkdev(btt_major, "btt"); return rc; } @@ -1446,7 +1432,6 @@ static int __init nd_btt_init(void) static void __exit nd_btt_exit(void) { debugfs_remove_recursive(debugfs_root); - unregister_blkdev(btt_major, "btt"); } MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);