From 0178722be9ed575d1724f6b815b7a1eb74b1ccb7 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 22 Jul 2013 10:18:15 +0900 Subject: [PATCH 01/32] s390: replace strict_strtoul() with kstrtoul() The usage of strict_strtoul() is not preferred, because strict_strtoul() is obsolete. Thus, kstrtoul() should be used. Signed-off-by: Jingoo Han Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_devmap.c | 8 ++++---- drivers/s390/cio/ccwgroup.c | 2 +- drivers/s390/cio/cmf.c | 2 +- drivers/s390/cio/css.c | 2 +- drivers/s390/cio/device.c | 2 +- drivers/s390/net/qeth_l3_sys.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 58bc6eb49de1..2ead7e78c456 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -930,7 +930,7 @@ dasd_use_raw_store(struct device *dev, struct device_attribute *attr, if (IS_ERR(devmap)) return PTR_ERR(devmap); - if ((strict_strtoul(buf, 10, &val) != 0) || val > 1) + if ((kstrtoul(buf, 10, &val) != 0) || val > 1) return -EINVAL; spin_lock(&dasd_devmap_lock); @@ -1225,7 +1225,7 @@ dasd_expires_store(struct device *dev, struct device_attribute *attr, if (IS_ERR(device)) return -ENODEV; - if ((strict_strtoul(buf, 10, &val) != 0) || + if ((kstrtoul(buf, 10, &val) != 0) || (val > DASD_EXPIRES_MAX) || val == 0) { dasd_put_device(device); return -EINVAL; @@ -1265,7 +1265,7 @@ dasd_retries_store(struct device *dev, struct device_attribute *attr, if (IS_ERR(device)) return -ENODEV; - if ((strict_strtoul(buf, 10, &val) != 0) || + if ((kstrtoul(buf, 10, &val) != 0) || (val > DASD_RETRIES_MAX)) { dasd_put_device(device); return -EINVAL; @@ -1307,7 +1307,7 @@ dasd_timeout_store(struct device *dev, struct device_attribute *attr, if (IS_ERR(device) || !device->block) return -ENODEV; - if ((strict_strtoul(buf, 10, &val) != 0) || + if ((kstrtoul(buf, 10, &val) != 0) || val > UINT_MAX / HZ) { dasd_put_device(device); return -EINVAL; diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 84846c2b96d3..959135a01847 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -137,7 +137,7 @@ static ssize_t ccwgroup_online_store(struct device *dev, if (!try_module_get(gdrv->driver.owner)) return -EINVAL; - ret = strict_strtoul(buf, 0, &value); + ret = kstrtoul(buf, 0, &value); if (ret) goto out; diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 4495e0627a40..23054f8fa9fc 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -1182,7 +1182,7 @@ static ssize_t cmb_enable_store(struct device *dev, int ret; unsigned long val; - ret = strict_strtoul(buf, 16, &val); + ret = kstrtoul(buf, 16, &val); if (ret) return ret; diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 1ebe5d3ddebb..4eb2a54e64f2 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -740,7 +740,7 @@ css_cm_enable_store(struct device *dev, struct device_attribute *attr, int ret; unsigned long val; - ret = strict_strtoul(buf, 16, &val); + ret = kstrtoul(buf, 16, &val); if (ret) return ret; mutex_lock(&css->mutex); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 1ab5f6c36d9b..e4a7ab2bb629 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -564,7 +564,7 @@ static ssize_t online_store (struct device *dev, struct device_attribute *attr, ret = 0; } else { force = 0; - ret = strict_strtoul(buf, 16, &i); + ret = kstrtoul(buf, 16, &i); } if (ret) goto out; diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index d1c8025b0b03..adef5f5de118 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -208,7 +208,7 @@ static ssize_t qeth_l3_dev_sniffer_store(struct device *dev, goto out; } - rc = strict_strtoul(buf, 16, &i); + rc = kstrtoul(buf, 16, &i); if (rc) { rc = -EINVAL; goto out; From 958d9072b691ecd99c5d7c01939edfc6f8ba0269 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Jul 2013 06:43:57 +0200 Subject: [PATCH 02/32] s390: replace remaining strict_strtoul() with kstrtoul() Replace the last two strict_strtoul() with kstrtoul(). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 2 +- arch/s390/kernel/vdso.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index e9fadb04e3c6..2bc08039140f 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1299,7 +1299,7 @@ int regs_query_register_offset(const char *name) if (!name || *name != 'r') return -EINVAL; - if (strict_strtoul(name + 1, 10, &offset)) + if (kstrtoul(name + 1, 10, &offset)) return -EINVAL; if (offset >= NUM_GPRS) return -EINVAL; diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d7776281cb60..3cf20930574e 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -63,7 +63,7 @@ static int __init vdso_setup(char *s) else if (strncmp(s, "off", 4) == 0) vdso_enabled = 0; else { - rc = strict_strtoul(s, 0, &val); + rc = kstrtoul(s, 0, &val); vdso_enabled = rc ? 0 : !!val; } return !rc; From 0ea46b0e371e3ccc0ce666c4988a7961e4ffa8ec Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Fri, 14 Jun 2013 10:24:25 +0200 Subject: [PATCH 03/32] s390/dasd: cleanup timeout and transport error messages Just a small update to the wording of the messages, to bring them more in line with our other messages. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_erp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index 8d11f773a752..ba99b64e2b3f 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -160,11 +160,13 @@ dasd_log_sense(struct dasd_ccw_req *cqr, struct irb *irb) device = cqr->startdev; if (cqr->intrc == -ETIMEDOUT) { - dev_err(&device->cdev->dev, "cqr %p timeout error", cqr); + dev_err(&device->cdev->dev, + "A timeout error occurred for cqr %p", cqr); return; } if (cqr->intrc == -ENOLINK) { - dev_err(&device->cdev->dev, "cqr %p transport error", cqr); + dev_err(&device->cdev->dev, + "A transport error occurred for cqr %p", cqr); return; } /* dump sense data */ From a9a6f0341df9a634a98aaf252c89962af77d1376 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 25 Jun 2013 14:17:57 +0200 Subject: [PATCH 04/32] s390/airq: introduce adapter interrupt vector helper The PCI code is the first user of adapter interrupts vectors. Add a set of helpers to airq.c to separate the adatper interrupt code from the PCI bits. The helpers allow for adapter interrupt vectors of any size. Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/airq.h | 67 +++++++++++++++ drivers/s390/cio/airq.c | 153 +++++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+) diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h index 4066cee0c2d2..4bbb5957ed1b 100644 --- a/arch/s390/include/asm/airq.h +++ b/arch/s390/include/asm/airq.h @@ -9,6 +9,8 @@ #ifndef _ASM_S390_AIRQ_H #define _ASM_S390_AIRQ_H +#include + struct airq_struct { struct hlist_node list; /* Handler queueing. */ void (*handler)(struct airq_struct *); /* Thin-interrupt handler */ @@ -23,4 +25,69 @@ struct airq_struct { int register_adapter_interrupt(struct airq_struct *airq); void unregister_adapter_interrupt(struct airq_struct *airq); +/* Adapter interrupt bit vector */ +struct airq_iv { + unsigned long *vector; /* Adapter interrupt bit vector */ + unsigned long *avail; /* Allocation bit mask for the bit vector */ + unsigned long *bitlock; /* Lock bit mask for the bit vector */ + unsigned long *ptr; /* Pointer associated with each bit */ + unsigned int *data; /* 32 bit value associated with each bit */ + unsigned long bits; /* Number of bits in the vector */ + unsigned long end; /* Number of highest allocated bit + 1 */ + spinlock_t lock; /* Lock to protect alloc & free */ +}; + +#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */ +#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */ +#define AIRQ_IV_PTR 4 /* Allocate the ptr array */ +#define AIRQ_IV_DATA 8 /* Allocate the data array */ + +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags); +void airq_iv_release(struct airq_iv *iv); +unsigned long airq_iv_alloc_bit(struct airq_iv *iv); +void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit); +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end); + +static inline unsigned long airq_iv_end(struct airq_iv *iv) +{ + return iv->end; +} + +static inline void airq_iv_lock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_lock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_unlock(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + bit_spin_unlock(bit ^ be_to_le, iv->bitlock); +} + +static inline void airq_iv_set_data(struct airq_iv *iv, unsigned long bit, + unsigned int data) +{ + iv->data[bit] = data; +} + +static inline unsigned int airq_iv_get_data(struct airq_iv *iv, + unsigned long bit) +{ + return iv->data[bit]; +} + +static inline void airq_iv_set_ptr(struct airq_iv *iv, unsigned long bit, + unsigned long ptr) +{ + iv->ptr[bit] = ptr; +} + +static inline unsigned long airq_iv_get_ptr(struct airq_iv *iv, + unsigned long bit) +{ + return iv->ptr[bit]; +} + #endif /* _ASM_S390_AIRQ_H */ diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 91edbd7ee806..6ead6d076445 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -93,3 +93,156 @@ void do_adapter_IO(u8 isc) airq->handler(airq); rcu_read_unlock(); } + +/** + * airq_iv_create - create an interrupt vector + * @bits: number of bits in the interrupt vector + * @flags: allocation flags + * + * Returns a pointer to an interrupt vector structure + */ +struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags) +{ + struct airq_iv *iv; + unsigned long size; + + iv = kzalloc(sizeof(*iv), GFP_KERNEL); + if (!iv) + goto out; + iv->bits = bits; + size = BITS_TO_LONGS(bits) * sizeof(unsigned long); + iv->vector = kzalloc(size, GFP_KERNEL); + if (!iv->vector) + goto out_free; + if (flags & AIRQ_IV_ALLOC) { + iv->avail = kmalloc(size, GFP_KERNEL); + if (!iv->avail) + goto out_free; + memset(iv->avail, 0xff, size); + iv->end = 0; + } else + iv->end = bits; + if (flags & AIRQ_IV_BITLOCK) { + iv->bitlock = kzalloc(size, GFP_KERNEL); + if (!iv->bitlock) + goto out_free; + } + if (flags & AIRQ_IV_PTR) { + size = bits * sizeof(unsigned long); + iv->ptr = kzalloc(size, GFP_KERNEL); + if (!iv->ptr) + goto out_free; + } + if (flags & AIRQ_IV_DATA) { + size = bits * sizeof(unsigned int); + iv->data = kzalloc(size, GFP_KERNEL); + if (!iv->data) + goto out_free; + } + spin_lock_init(&iv->lock); + return iv; + +out_free: + kfree(iv->ptr); + kfree(iv->bitlock); + kfree(iv->avail); + kfree(iv->vector); + kfree(iv); +out: + return NULL; +} +EXPORT_SYMBOL(airq_iv_create); + +/** + * airq_iv_release - release an interrupt vector + * @iv: pointer to interrupt vector structure + */ +void airq_iv_release(struct airq_iv *iv) +{ + kfree(iv->data); + kfree(iv->ptr); + kfree(iv->bitlock); + kfree(iv->vector); + kfree(iv->avail); + kfree(iv); +} +EXPORT_SYMBOL(airq_iv_release); + +/** + * airq_iv_alloc_bit - allocate an irq bit from an interrupt vector + * @iv: pointer to an interrupt vector structure + * + * Returns the bit number of the allocated irq, or -1UL if no bit + * is available or the AIRQ_IV_ALLOC flag has not been specified + */ +unsigned long airq_iv_alloc_bit(struct airq_iv *iv) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + unsigned long bit; + + if (!iv->avail) + return -1UL; + spin_lock(&iv->lock); + bit = find_first_bit_left(iv->avail, iv->bits); + if (bit < iv->bits) { + clear_bit(bit ^ be_to_le, iv->avail); + if (bit >= iv->end) + iv->end = bit + 1; + } else + bit = -1UL; + spin_unlock(&iv->lock); + return bit; + +} +EXPORT_SYMBOL(airq_iv_alloc_bit); + +/** + * airq_iv_free_bit - free an irq bit of an interrupt vector + * @iv: pointer to interrupt vector structure + * @bit: number of the irq bit to free + */ +void airq_iv_free_bit(struct airq_iv *iv, unsigned long bit) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + + if (!iv->avail) + return; + spin_lock(&iv->lock); + /* Clear (possibly left over) interrupt bit */ + clear_bit(bit ^ be_to_le, iv->vector); + /* Make the bit position available again */ + set_bit(bit ^ be_to_le, iv->avail); + if (bit == iv->end - 1) { + /* Find new end of bit-field */ + while (--iv->end > 0) + if (!test_bit((iv->end - 1) ^ be_to_le, iv->avail)) + break; + } + spin_unlock(&iv->lock); +} +EXPORT_SYMBOL(airq_iv_free_bit); + +/** + * airq_iv_scan - scan interrupt vector for non-zero bits + * @iv: pointer to interrupt vector structure + * @start: bit number to start the search + * @end: bit number to end the search + * + * Returns the bit number of the next non-zero interrupt bit, or + * -1UL if the scan completed without finding any more any non-zero bits. + */ +unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start, + unsigned long end) +{ + const unsigned long be_to_le = BITS_PER_LONG - 1; + unsigned long bit; + + /* Find non-zero bit starting from 'ivs->next'. */ + bit = find_next_bit_left(iv->vector, end, start); + if (bit >= end) + return -1UL; + /* Clear interrupt bit (find left uses big-endian bit numbers) */ + clear_bit(bit ^ be_to_le, iv->vector); + return bit; +} +EXPORT_SYMBOL(airq_iv_scan); From 9389339f28c2f0a32ad5043a1fe05444a7f3e677 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 25 Jun 2013 14:52:23 +0200 Subject: [PATCH 05/32] s390/pci: cleanup function names Rename s390pci_xyz to zpci_xxz and set_irq_ctrl to zpci_set_irq_ctrl. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_insn.h | 12 ++++++------ arch/s390/include/asm/pci_io.h | 10 +++++----- arch/s390/pci/pci.c | 12 ++++++------ arch/s390/pci/pci_dma.c | 4 ++-- arch/s390/pci/pci_insn.c | 18 +++++++++--------- 5 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h index e6a2bdd4d705..df6eac9f0cb4 100644 --- a/arch/s390/include/asm/pci_insn.h +++ b/arch/s390/include/asm/pci_insn.h @@ -79,11 +79,11 @@ struct zpci_fib { } __packed; -int s390pci_mod_fc(u64 req, struct zpci_fib *fib); -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range); -int s390pci_load(u64 *data, u64 req, u64 offset); -int s390pci_store(u64 data, u64 req, u64 offset); -int s390pci_store_block(const u64 *data, u64 req, u64 offset); -void set_irq_ctrl(u16 ctl, char *unused, u8 isc); +int zpci_mod_fc(u64 req, struct zpci_fib *fib); +int zpci_refresh_trans(u64 fn, u64 addr, u64 range); +int zpci_load(u64 *data, u64 req, u64 offset); +int zpci_store(u64 data, u64 req, u64 offset); +int zpci_store_block(const u64 *data, u64 req, u64 offset); +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc); #endif diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index 83a9caa6ae53..d194d544d694 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -36,7 +36,7 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \ u64 data; \ int rc; \ \ - rc = s390pci_load(&data, req, ZPCI_OFFSET(addr)); \ + rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \ if (rc) \ data = -1ULL; \ return (RETTYPE) data; \ @@ -50,7 +50,7 @@ static inline void zpci_write_##VALTYPE(VALTYPE val, \ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \ u64 data = (VALTYPE) val; \ \ - s390pci_store(data, req, ZPCI_OFFSET(addr)); \ + zpci_store(data, req, ZPCI_OFFSET(addr)); \ } zpci_read(8, u64) @@ -83,7 +83,7 @@ static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len val = 0; /* let FW report error */ break; } - return s390pci_store(val, req, offset); + return zpci_store(val, req, offset); } static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) @@ -91,7 +91,7 @@ static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len) u64 data; int cc; - cc = s390pci_load(&data, req, offset); + cc = zpci_load(&data, req, offset); if (cc) goto out; @@ -115,7 +115,7 @@ out: static inline int zpci_write_block(u64 req, const u64 *data, u64 offset) { - return s390pci_store_block(data, req, offset); + return zpci_store_block(data, req, offset); } static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index e2956ad39a4f..5b8feffe65dd 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -179,7 +179,7 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, fib->aisb = (u64) bucket->aisb + aisb / 8; fib->aisbo = aisb & ZPCI_MSI_MASK; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); free_page((unsigned long) fib); @@ -209,7 +209,7 @@ static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args fib->iota = args->iota; fib->fmb_addr = args->fmb_addr; - rc = s390pci_mod_fc(req, fib); + rc = zpci_mod_fc(req, fib); free_page((unsigned long) fib); return rc; } @@ -283,7 +283,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) u64 data; int rc; - rc = s390pci_load(&data, req, offset); + rc = zpci_load(&data, req, offset); if (!rc) { data = data << ((8 - len) * 8); data = le64_to_cpu(data); @@ -301,7 +301,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) data = cpu_to_le64(data); data = data >> ((8 - len) * 8); - rc = s390pci_store(data, req, offset); + rc = zpci_store(data, req, offset); return rc; } @@ -448,7 +448,7 @@ scan: } /* enable interrupts again */ - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); /* check again to not lose initiative */ rmb(); @@ -727,7 +727,7 @@ static int __init zpci_irq_init(void) per_cpu(next_sbit, cpu) = 0; spin_lock_init(&bucket->lock); - set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; out_ai: diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index a2343c1f6e04..2125310aa891 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -170,8 +170,8 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa, */ goto no_refresh; - rc = s390pci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, - nr_pages * PAGE_SIZE); + rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr, + nr_pages * PAGE_SIZE); no_refresh: spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags); diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 22eeb9d7ffeb..85267c058af8 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -27,7 +27,7 @@ static inline u8 __mpcifc(u64 req, struct zpci_fib *fib, u8 *status) return cc; } -int s390pci_mod_fc(u64 req, struct zpci_fib *fib) +int zpci_mod_fc(u64 req, struct zpci_fib *fib) { u8 cc, status; @@ -61,7 +61,7 @@ static inline u8 __rpcit(u64 fn, u64 addr, u64 range, u8 *status) return cc; } -int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) +int zpci_refresh_trans(u64 fn, u64 addr, u64 range) { u8 cc, status; @@ -78,7 +78,7 @@ int s390pci_refresh_trans(u64 fn, u64 addr, u64 range) } /* Set Interruption Controls */ -void set_irq_ctrl(u16 ctl, char *unused, u8 isc) +void zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc) { asm volatile ( " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n" @@ -109,7 +109,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_load(u64 *data, u64 req, u64 offset) +int zpci_load(u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -125,7 +125,7 @@ int s390pci_load(u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_load); +EXPORT_SYMBOL_GPL(zpci_load); /* PCI Store */ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) @@ -147,7 +147,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store(u64 data, u64 req, u64 offset) +int zpci_store(u64 data, u64 req, u64 offset) { u8 status; int cc; @@ -163,7 +163,7 @@ int s390pci_store(u64 data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store); +EXPORT_SYMBOL_GPL(zpci_store); /* PCI Store Block */ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) @@ -183,7 +183,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status) return cc; } -int s390pci_store_block(const u64 *data, u64 req, u64 offset) +int zpci_store_block(const u64 *data, u64 req, u64 offset) { u8 status; int cc; @@ -199,4 +199,4 @@ int s390pci_store_block(const u64 *data, u64 req, u64 offset) __func__, cc, status, req, offset); return (cc > 0) ? -EIO : cc; } -EXPORT_SYMBOL_GPL(s390pci_store_block); +EXPORT_SYMBOL_GPL(zpci_store_block); From 5d0d8f43535bc4e19406ecf158340ccc4027a477 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 25 Jun 2013 16:36:29 +0200 Subject: [PATCH 06/32] s390/pci: use adapter interrupt vector helpers Make use of the adapter interrupt helpers in the PCI code. This is the first step to convert the MSI interrupt code to PCI domains. The patch removes the limitation of 64 adapter interrupts per PCI function. Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 8 +- arch/s390/pci/pci.c | 355 ++++++++++++++++-------------------- arch/s390/pci/pci_msi.c | 18 +- 3 files changed, 169 insertions(+), 212 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6e577ba0e5da..7d3295087aa4 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -59,8 +59,9 @@ struct msi_map { struct hlist_node msi_chain; }; -#define ZPCI_NR_MSI_VECS 64 -#define ZPCI_MSI_MASK (ZPCI_NR_MSI_VECS - 1) +#define ZPCI_MSI_VEC_BITS 11 +#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) +#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) enum zpci_state { ZPCI_FN_STATE_RESERVED, @@ -92,7 +93,8 @@ struct zpci_dev { /* IRQ stuff */ u64 msi_addr; /* MSI address */ struct zdev_irq_map *irq_map; - struct msi_map *msi_map[ZPCI_NR_MSI_VECS]; + struct msi_map *msi_map; + struct airq_iv *aibv; /* adapter interrupt bit vector */ unsigned int aisb; /* number of the summary bit */ /* DMA stuff */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 5b8feffe65dd..27e7fed3707d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -42,7 +42,6 @@ #define SIC_IRQ_MODE_SINGLE 1 #define ZPCI_NR_DMA_SPACES 1 -#define ZPCI_MSI_VEC_BITS 6 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS /* list of all detected zpci devices */ @@ -62,25 +61,13 @@ struct callback { }; struct zdev_irq_map { - unsigned long aibv; /* AI bit vector */ - int msi_vecs; /* consecutive MSI-vectors used */ - int __unused; - struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */ - spinlock_t lock; /* protect callbacks against de-reg */ + struct airq_iv *aibv; /* Adapter interrupt bit vector */ + struct callback *cb; /* callback handler array */ + int msi_vecs; /* consecutive MSI-vectors used */ }; -struct intr_bucket { - /* amap of adapters, one bit per dev, corresponds to one irq nr */ - unsigned long *alloc; - /* AI summary bit, global page for all devices */ - unsigned long *aisb; - /* pointer to aibv and callback data in zdev */ - struct zdev_irq_map *imap[ZPCI_NR_DEVICES]; - /* protects the whole bucket struct */ - spinlock_t lock; -}; - -static struct intr_bucket *bucket; +static struct airq_iv *zpci_aisb_iv; +static struct zdev_irq_map *zpci_imap[ZPCI_NR_DEVICES]; /* Adapter interrupt definitions */ static void zpci_irq_handler(struct airq_struct *airq); @@ -96,15 +83,12 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); -/* highest irq summary bit */ -static int __read_mostly aisb_max; - static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; static inline int irq_to_msi_nr(unsigned int irq) { - return irq & ZPCI_MSI_MASK; + return irq & ZPCI_MSI_VEC_MASK; } static inline int irq_to_dev_nr(unsigned int irq) @@ -112,11 +96,6 @@ static inline int irq_to_dev_nr(unsigned int irq) return irq >> ZPCI_MSI_VEC_BITS; } -static inline struct zdev_irq_map *get_imap(unsigned int irq) -{ - return bucket->imap[irq_to_dev_nr(irq)]; -} - struct zpci_dev *get_zdev(struct pci_dev *pdev) { return (struct zpci_dev *) pdev->sysdata; @@ -160,8 +139,7 @@ int pci_proc_domain(struct pci_bus *bus) EXPORT_SYMBOL_GPL(pci_proc_domain); /* Modify PCI: Register adapter interruptions */ -static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, - u64 aibv) +static int zpci_set_airq(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); struct zpci_fib *fib; @@ -172,12 +150,12 @@ static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, return -ENOMEM; fib->isc = PCI_ISC; - fib->noi = zdev->irq_map->msi_vecs; fib->sum = 1; /* enable summary notifications */ - fib->aibv = aibv; - fib->aibvo = 0; /* every function has its own page */ - fib->aisb = (u64) bucket->aisb + aisb / 8; - fib->aisbo = aisb & ZPCI_MSI_MASK; + fib->noi = airq_iv_end(zdev->aibv); + fib->aibv = (unsigned long) zdev->aibv->vector; + fib->aibvo = 0; /* each zdev has its own interrupt vector */ + fib->aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8; + fib->aisbo = zdev->aisb & 63; rc = zpci_mod_fc(req, fib); pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); @@ -234,7 +212,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) } /* Modify PCI: Unregister adapter interruptions */ -static int zpci_unregister_airq(struct zpci_dev *zdev) +static int zpci_clear_airq(struct zpci_dev *zdev) { struct mod_pci_args args = { 0, 0, 0, 0 }; @@ -404,152 +382,171 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; -/* store the last handled bit to implement fair scheduling of devices */ -static DEFINE_PER_CPU(unsigned long, next_sbit); - static void zpci_irq_handler(struct airq_struct *airq) { - unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); - int rescan = 0, max = aisb_max; + unsigned long si, ai; struct zdev_irq_map *imap; + int irqs_on = 0; inc_irq_stat(IRQIO_PCI); - sbit = start; + for (si = 0;;) { + /* Scan adapter summary indicator bit vector */ + si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv)); + if (si == -1UL) { + if (irqs_on++) + /* End of second scan with interrupts on. */ + break; + /* First scan complete, reenable interrupts. */ + zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + si = 0; + continue; + } -scan: - /* find summary_bit */ - for_each_set_bit_left_cont(sbit, bucket->aisb, max) { - clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6)); - last = sbit; - - /* find vector bit */ - imap = bucket->imap[sbit]; - for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) { + /* Scan the adapter interrupt vector for this device. */ + imap = zpci_imap[si]; + for (ai = 0;;) { + ai = airq_iv_scan(imap->aibv, ai, imap->msi_vecs); + if (ai == -1UL) + break; inc_irq_stat(IRQIO_MSI); - clear_bit(63 - mbit, &imap->aibv); - - spin_lock(&imap->lock); - if (imap->cb[mbit].handler) - imap->cb[mbit].handler(mbit, - imap->cb[mbit].data); - spin_unlock(&imap->lock); + airq_iv_lock(imap->aibv, ai); + if (imap->cb[ai].handler) + imap->cb[ai].handler(ai, imap->cb[ai].data); + airq_iv_unlock(imap->aibv, ai); } } - - if (rescan) - goto out; - - /* scan the skipped bits */ - if (start > 0) { - sbit = 0; - max = start; - start = 0; - goto scan; - } - - /* enable interrupts again */ - zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); - - /* check again to not lose initiative */ - rmb(); - max = aisb_max; - sbit = find_first_bit_left(bucket->aisb, max); - if (sbit != max) { - rescan++; - goto scan; - } -out: - /* store next device bit to scan */ - __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last; } -/* msi_vecs - number of requested interrupts, 0 place function to error state */ -static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs) +static int zpci_alloc_msi(struct zpci_dev *zdev, int msi_vecs) +{ + unsigned long size; + + /* Alloc aibv & callback space */ + zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL); + if (!zdev->irq_map) + goto out; + /* Store the number of used MSI vectors */ + zdev->irq_map->msi_vecs = msi_vecs; + /* Allocate callback array */ + size = sizeof(struct callback) * msi_vecs; + zdev->irq_map->cb = kzalloc(size, GFP_KERNEL); + if (!zdev->irq_map->cb) + goto out_map; + /* Allocate msi_map array */ + size = sizeof(struct msi_map) * msi_vecs; + zdev->msi_map = kzalloc(size, GFP_KERNEL); + if (!zdev->msi_map) + goto out_cb; + return 0; + +out_cb: + kfree(zdev->irq_map->cb); +out_map: + kmem_cache_free(zdev_irq_cache, zdev->irq_map); +out: + return -ENOMEM; +} + +static void zpci_free_msi(struct zpci_dev *zdev) +{ + kfree(zdev->msi_map); + kfree(zdev->irq_map->cb); + kmem_cache_free(zdev_irq_cache, zdev->irq_map); +} + +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct zpci_dev *zdev = get_zdev(pdev); - unsigned int aisb, msi_nr; + unsigned int msi_nr, msi_vecs; + unsigned long aisb; struct msi_desc *msi; int rc; - /* store the number of used MSI vectors */ - zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS); - - spin_lock(&bucket->lock); - aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE); - /* alloc map exhausted? */ - if (aisb == PAGE_SIZE) { - spin_unlock(&bucket->lock); - return -EIO; - } - set_bit(aisb, bucket->alloc); - spin_unlock(&bucket->lock); + pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); + if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) + return -EINVAL; + msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + /* Allocate adapter summary indicator bit */ + rc = -EIO; + aisb = airq_iv_alloc_bit(zpci_aisb_iv); + if (aisb == -1UL) + goto out; zdev->aisb = aisb; - if (aisb + 1 > aisb_max) - aisb_max = aisb + 1; - /* wire up IRQ shortcut pointer */ - bucket->imap[zdev->aisb] = zdev->irq_map; - pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map); + /* Create adapter interrupt vector */ + rc = -ENOMEM; + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_BITLOCK); + if (!zdev->aibv) + goto out_si; - /* TODO: irq number 0 wont be found if we return less than requested MSIs. - * ignore it for now and fix in common code. + /* Allocate data structures for msi interrupts */ + rc = zpci_alloc_msi(zdev, msi_vecs); + if (rc) + goto out_iv; + + /* Wire up shortcut pointer */ + zpci_imap[aisb] = zdev->irq_map; + zdev->irq_map->aibv = zdev->aibv; + + /* + * TODO: irq number 0 wont be found if we return less than the + * requested MSIs. Ignore it for now and fix in common code. */ msi_nr = aisb << ZPCI_MSI_VEC_BITS; - list_for_each_entry(msi, &pdev->msi_list, list) { rc = zpci_setup_msi_irq(zdev, msi, msi_nr, aisb << ZPCI_MSI_VEC_BITS); if (rc) - return rc; + goto out_msi; msi_nr++; } - rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv); - if (rc) { - clear_bit(aisb, bucket->alloc); - dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); - return rc; + /* Enable adapter interrupts */ + rc = zpci_set_airq(zdev); + if (rc) + goto out_msi; + + return (msi_vecs == nvec) ? 0 : msi_vecs; + +out_msi: + msi_nr -= aisb << ZPCI_MSI_VEC_BITS; + list_for_each_entry(msi, &pdev->msi_list, list) { + if (msi_nr-- == 0) + break; + zpci_teardown_msi_irq(zdev, msi); } - return (zdev->irq_map->msi_vecs == msi_vecs) ? - 0 : zdev->irq_map->msi_vecs; + zpci_free_msi(zdev); +out_iv: + airq_iv_release(zdev->aibv); +out_si: + airq_iv_free_bit(zpci_aisb_iv, aisb); +out: + dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); + return rc; } -static void zpci_teardown_msi(struct pci_dev *pdev) +void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); struct msi_desc *msi; - int aisb, rc; + int rc; - rc = zpci_unregister_airq(zdev); + pr_info("%s: on pdev: %p\n", __func__, pdev); + + /* Disable adapter interrupts */ + rc = zpci_clear_airq(zdev); if (rc) { dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc); return; } - msi = list_first_entry(&pdev->msi_list, struct msi_desc, list); - aisb = irq_to_dev_nr(msi->irq); - list_for_each_entry(msi, &pdev->msi_list, list) zpci_teardown_msi_irq(zdev, msi); - clear_bit(aisb, bucket->alloc); - if (aisb + 1 == aisb_max) - aisb_max--; -} - -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) -{ - pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); - if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) - return -EINVAL; - return zpci_setup_msi(pdev, nvec); -} - -void arch_teardown_msi_irqs(struct pci_dev *pdev) -{ - pr_info("%s: on pdev: %p\n", __func__, pdev); - zpci_teardown_msi(pdev); + zpci_free_msi(zdev); + airq_iv_release(zdev->aibv); + airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } static void zpci_map_resources(struct zpci_dev *zdev) @@ -589,24 +586,11 @@ struct zpci_dev *zpci_alloc_device(void) /* Alloc memory for our private pci device data */ zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - return ERR_PTR(-ENOMEM); - - /* Alloc aibv & callback space */ - zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL); - if (!zdev->irq_map) - goto error; - WARN_ON((u64) zdev->irq_map & 0xff); - return zdev; - -error: - kfree(zdev); - return ERR_PTR(-ENOMEM); + return zdev ? : ERR_PTR(-ENOMEM); } void zpci_free_device(struct zpci_dev *zdev) { - kmem_cache_free(zdev_irq_cache, zdev->irq_map); kfree(zdev); } @@ -643,7 +627,8 @@ int pcibios_add_platform_entries(struct pci_dev *pdev) int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) { - int msi_nr = irq_to_msi_nr(irq); + unsigned int msi_nr = irq_to_msi_nr(irq); + unsigned int dev_nr = irq_to_dev_nr(irq); struct zdev_irq_map *imap; struct msi_desc *msi; @@ -651,10 +636,7 @@ int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) if (!msi) return -EIO; - imap = get_imap(irq); - spin_lock_init(&imap->lock); - - pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr); + imap = zpci_imap[dev_nr]; imap->cb[msi_nr].handler = handler; imap->cb[msi_nr].data = data; @@ -669,24 +651,25 @@ int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) void zpci_free_irq(unsigned int irq) { - struct zdev_irq_map *imap = get_imap(irq); - int msi_nr = irq_to_msi_nr(irq); - unsigned long flags; + unsigned int msi_nr = irq_to_msi_nr(irq); + unsigned int dev_nr = irq_to_dev_nr(irq); + struct zdev_irq_map *imap; + struct msi_desc *msi; - pr_debug("%s: for irq: %d\n", __func__, irq); - - spin_lock_irqsave(&imap->lock, flags); + /* Disable interrupt */ + msi = irq_get_msi_desc(irq); + if (!msi) + return; + zpci_msi_set_mask_bits(msi, 1, 1); + imap = zpci_imap[dev_nr]; imap->cb[msi_nr].handler = NULL; imap->cb[msi_nr].data = NULL; - spin_unlock_irqrestore(&imap->lock, flags); + synchronize_rcu(); } int request_irq(unsigned int irq, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) { - pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n", - __func__, irq, handler, irqflags, devname); - return zpci_request_irq(irq, handler, dev_id); } EXPORT_SYMBOL_GPL(request_irq); @@ -699,52 +682,32 @@ EXPORT_SYMBOL_GPL(free_irq); static int __init zpci_irq_init(void) { - int cpu, rc; - - bucket = kzalloc(sizeof(*bucket), GFP_KERNEL); - if (!bucket) - return -ENOMEM; - - bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->aisb) { - rc = -ENOMEM; - goto out_aisb; - } - - bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL); - if (!bucket->alloc) { - rc = -ENOMEM; - goto out_alloc; - } + int rc; rc = register_adapter_interrupt(&zpci_airq); if (rc) - goto out_ai; + goto out; /* Set summary to 1 to be called every time for the ISC. */ *zpci_airq.lsi_ptr = 1; - for_each_online_cpu(cpu) - per_cpu(next_sbit, cpu) = 0; + rc = -ENOMEM; + zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC); + if (!zpci_aisb_iv) + goto out_airq; - spin_lock_init(&bucket->lock); zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); return 0; -out_ai: - free_page((unsigned long) bucket->alloc); -out_alloc: - free_page((unsigned long) bucket->aisb); -out_aisb: - kfree(bucket); +out_airq: + unregister_adapter_interrupt(&zpci_airq); +out: return rc; } static void zpci_irq_exit(void) { - free_page((unsigned long) bucket->alloc); - free_page((unsigned long) bucket->aisb); + airq_iv_release(zpci_aisb_iv); unregister_adapter_interrupt(&zpci_airq); - kfree(bucket); } static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c index b097aed05a9b..d4b480a18b0f 100644 --- a/arch/s390/pci/pci_msi.c +++ b/arch/s390/pci/pci_msi.c @@ -62,19 +62,15 @@ int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) } int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, - unsigned int nr, int offset) + unsigned int nr, int offset) { struct msi_map *map; struct msi_msg msg; int rc; - map = kmalloc(sizeof(*map), GFP_KERNEL); - if (map == NULL) - return -ENOMEM; - + map = zdev->msi_map + (nr & ZPCI_MSI_VEC_MASK); map->irq = nr; map->msi = msi; - zdev->msi_map[nr & ZPCI_MSI_MASK] = map; INIT_HLIST_NODE(&map->msi_chain); pr_debug("%s hashing irq: %u to bucket nr: %llu\n", @@ -86,8 +82,6 @@ int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, if (rc) { spin_unlock(&msi_map_lock); hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[nr & ZPCI_MSI_MASK] = NULL; return rc; } spin_unlock(&msi_map_lock); @@ -101,20 +95,18 @@ int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi) { - int irq = msi->irq & ZPCI_MSI_MASK; + int nr = msi->irq & ZPCI_MSI_VEC_MASK; struct msi_map *map; + zpci_msi_set_mask_bits(msi, 1, 1); msi->msg.address_lo = 0; msi->msg.address_hi = 0; msi->msg.data = 0; msi->irq = 0; - zpci_msi_set_mask_bits(msi, 1, 1); spin_lock(&msi_map_lock); - map = zdev->msi_map[irq]; + map = zdev->msi_map + nr; hlist_del_rcu(&map->msi_chain); - kfree(map); - zdev->msi_map[irq] = NULL; spin_unlock(&msi_map_lock); } From 1f44a225777e40fd9a945b09f958052c47494e1e Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 27 Jun 2013 09:01:09 +0200 Subject: [PATCH 07/32] s390: convert interrupt handling to use generic hardirq With the introduction of PCI it became apparent that s390 should convert to generic hardirqs as too many drivers do not have the correct dependency for GENERIC_HARDIRQS. On the architecture level s390 does not have irq lines. It has external interrupts, I/O interrupts and adapter interrupts. This patch hard-codes all external interrupts as irq #1, all I/O interrupts as irq #2 and all adapter interrupts as irq #3. The additional information from the lowcore associated with the interrupt is stored in the pt_regs of the interrupt frame, where the interrupt handler can pick it up. For PCI/MSI interrupts the adapter interrupt handler scans the relevant bit fields and calls generic_handle_irq with the virtual irq number for the MSI interrupt. Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 11 ++ arch/s390/include/asm/hardirq.h | 5 + arch/s390/include/asm/hw_irq.h | 17 +-- arch/s390/include/asm/irq.h | 35 +++-- arch/s390/include/asm/pci.h | 16 -- arch/s390/include/asm/serial.h | 6 + arch/s390/kernel/entry.S | 12 +- arch/s390/kernel/entry64.S | 9 +- arch/s390/kernel/irq.c | 160 ++++++++------------ arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci.c | 255 +++++++++++--------------------- arch/s390/pci/pci_msi.c | 134 ----------------- drivers/s390/cio/airq.c | 21 ++- drivers/s390/cio/cio.c | 46 +++--- drivers/s390/cio/cio.h | 3 - 15 files changed, 249 insertions(+), 483 deletions(-) create mode 100644 arch/s390/include/asm/serial.h delete mode 100644 arch/s390/pci/pci_msi.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8a4cae78f03c..8b7892bf6d8b 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -116,6 +116,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_TRACE_MCOUNT_TEST + select HAVE_GENERIC_HARDIRQS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -445,6 +446,16 @@ config PCI_NR_FUNCTIONS This allows you to specify the maximum number of PCI functions which this kernel will support. +config PCI_NR_MSI + int "Maximum number of MSI interrupts (64-32768)" + range 64 32768 + default "256" + help + This defines the number of virtual interrupts the kernel will + provide for MSI interrupts. If you configure your system to have + too few drivers will fail to allocate MSI interrupts for all + PCI devices. + source "drivers/pci/Kconfig" source "drivers/pci/pcie/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 0c82ba86e997..a908d2941c5d 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -20,4 +20,9 @@ #define HARDIRQ_BITS 8 +static inline void ack_bad_irq(unsigned int irq) +{ + printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); +} + #endif /* __ASM_HARDIRQ_H */ diff --git a/arch/s390/include/asm/hw_irq.h b/arch/s390/include/asm/hw_irq.h index 7e3d2586c1ff..ee96a8b697f9 100644 --- a/arch/s390/include/asm/hw_irq.h +++ b/arch/s390/include/asm/hw_irq.h @@ -4,19 +4,8 @@ #include #include -static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) -{ - return __irq_get_msi_desc(irq); -} - -/* Must be called with msi map lock held */ -static inline int irq_set_msi_desc(unsigned int irq, struct msi_desc *msi) -{ - if (!msi) - return -EINVAL; - - msi->irq = irq; - return 0; -} +void __init init_airq_interrupts(void); +void __init init_cio_interrupts(void); +void __init init_ext_interrupts(void); #endif diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index 87c17bfb2968..1eaa3625803c 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,17 +1,28 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H +#define EXT_INTERRUPT 1 +#define IO_INTERRUPT 2 +#define THIN_INTERRUPT 3 + +#define NR_IRQS_BASE 4 + +#ifdef CONFIG_PCI_NR_MSI +# define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) +#else +# define NR_IRQS NR_IRQS_BASE +#endif + +/* This number is used when no interrupt has been assigned */ +#define NO_IRQ 0 + +#ifndef __ASSEMBLY__ + #include #include #include #include -enum interruption_main_class { - EXTERNAL_INTERRUPT, - IO_INTERRUPT, - NR_IRQS -}; - enum interruption_class { IRQEXT_CLK, IRQEXT_EXC, @@ -72,14 +83,8 @@ void service_subclass_irq_unregister(void); void measurement_alert_subclass_register(void); void measurement_alert_subclass_unregister(void); -#ifdef CONFIG_LOCKDEP -# define disable_irq_nosync_lockdep(irq) disable_irq_nosync(irq) -# define disable_irq_nosync_lockdep_irqsave(irq, flags) \ - disable_irq_nosync(irq) -# define disable_irq_lockdep(irq) disable_irq(irq) -# define enable_irq_lockdep(irq) enable_irq(irq) -# define enable_irq_lockdep_irqrestore(irq, flags) \ - enable_irq(irq) -#endif +#define irq_canonicalize(irq) (irq) + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_IRQ_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 7d3295087aa4..d0872769d44e 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -53,12 +53,6 @@ struct zpci_fmb { atomic64_t unmapped_pages; } __packed __aligned(16); -struct msi_map { - unsigned long irq; - struct msi_desc *msi; - struct hlist_node msi_chain; -}; - #define ZPCI_MSI_VEC_BITS 11 #define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) #define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) @@ -92,8 +86,6 @@ struct zpci_dev { /* IRQ stuff */ u64 msi_addr; /* MSI address */ - struct zdev_irq_map *irq_map; - struct msi_map *msi_map; struct airq_iv *aibv; /* adapter interrupt bit vector */ unsigned int aisb; /* number of the summary bit */ @@ -153,14 +145,6 @@ int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); -/* MSI */ -struct msi_desc *__irq_get_msi_desc(unsigned int); -int zpci_msi_set_mask_bits(struct msi_desc *, u32, u32); -int zpci_setup_msi_irq(struct zpci_dev *, struct msi_desc *, unsigned int, int); -void zpci_teardown_msi_irq(struct zpci_dev *, struct msi_desc *); -int zpci_msihash_init(void); -void zpci_msihash_exit(void); - #ifdef CONFIG_PCI /* Error handling and recovery */ void zpci_event_error(void *); diff --git a/arch/s390/include/asm/serial.h b/arch/s390/include/asm/serial.h new file mode 100644 index 000000000000..5b3e48ef534b --- /dev/null +++ b/arch/s390/include/asm/serial.h @@ -0,0 +1,6 @@ +#ifndef _ASM_S390_SERIAL_H +#define _ASM_S390_SERIAL_H + +#define BASE_BAUD 0 + +#endif /* _ASM_S390_SERIAL_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index be7a408be7a1..5ca70b4b72cb 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -18,6 +18,7 @@ #include #include #include +#include __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 4 @@ -435,6 +436,11 @@ io_skip: io_loop: l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs + lhi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lhi %r3,THIN_INTERRUPT +io_call: basr %r14,%r1 # call do_IRQ tm __LC_MACHINE_FLAGS+2,0x10 # MACHINE_FLAG_LPAR jz io_return @@ -584,9 +590,10 @@ ext_skip: mvc __PT_INT_CODE(4,%r11),__LC_EXT_CPU_ADDR mvc __PT_INT_PARM(4,%r11),__LC_EXT_PARAMS TRACE_IRQS_OFF + l %r1,BASED(.Ldo_IRQ) lr %r2,%r11 # pass pointer to pt_regs - l %r1,BASED(.Ldo_extint) - basr %r14,%r1 # call do_extint + lhi %r3,EXT_INTERRUPT + basr %r14,%r1 # call do_IRQ j io_return /* @@ -902,7 +909,6 @@ cleanup_idle_wait: .Ldo_machine_check: .long s390_do_machine_check .Lhandle_mcck: .long s390_handle_mcck .Ldo_IRQ: .long do_IRQ -.Ldo_extint: .long do_extint .Ldo_signal: .long do_signal .Ldo_notify_resume: .long do_notify_resume .Ldo_per_trap: .long do_per_trap diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 1c039d0c24c7..980c7aa1cc5c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -19,6 +19,7 @@ #include #include #include +#include __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -468,6 +469,11 @@ io_skip: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) io_loop: lgr %r2,%r11 # pass pointer to pt_regs + lghi %r3,IO_INTERRUPT + tm __PT_INT_CODE+8(%r11),0x80 # adapter interrupt ? + jz io_call + lghi %r3,THIN_INTERRUPT +io_call: brasl %r14,do_IRQ tm __LC_MACHINE_FLAGS+6,0x10 # MACHINE_FLAG_LPAR jz io_return @@ -623,7 +629,8 @@ ext_skip: TRACE_IRQS_OFF xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs - brasl %r14,do_extint + lghi %r3,EXT_INTERRUPT + brasl %r14,do_IRQ j io_return /* diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 54b0995514e8..b34ba0ea96a9 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "entry.h" DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat); @@ -42,9 +43,10 @@ struct irq_class { * Since the external and I/O interrupt fields are already sums we would end * up with having a sum which accounts each interrupt twice. */ -static const struct irq_class irqclass_main_desc[NR_IRQS] = { - [EXTERNAL_INTERRUPT] = {.name = "EXT"}, - [IO_INTERRUPT] = {.name = "I/O"} +static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = { + [EXT_INTERRUPT] = {.name = "EXT"}, + [IO_INTERRUPT] = {.name = "I/O"}, + [THIN_INTERRUPT] = {.name = "AIO"}, }; /* @@ -86,6 +88,28 @@ static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = { [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"}, }; +void __init init_IRQ(void) +{ + irq_reserve_irqs(0, THIN_INTERRUPT); + init_cio_interrupts(); + init_airq_interrupts(); + init_ext_interrupts(); +} + +void do_IRQ(struct pt_regs *regs, int irq) +{ + struct pt_regs *old_regs; + + old_regs = set_irq_regs(regs); + irq_enter(); + if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) + /* Serve timer interrupts first. */ + clock_comparator_work(); + generic_handle_irq(irq); + irq_exit(); + set_irq_regs(old_regs); +} + /* * show_interrupts is needed by /proc/interrupts. */ @@ -100,27 +124,36 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); + goto out; } if (irq < NR_IRQS) { + if (irq >= NR_IRQS_BASE) + goto out; seq_printf(p, "%s: ", irqclass_main_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[irq]); + seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu)); seq_putc(p, '\n'); - goto skip_arch_irqs; + goto out; } for (irq = 0; irq < NR_ARCH_IRQS; irq++) { seq_printf(p, "%s: ", irqclass_sub_desc[irq].name); for_each_online_cpu(cpu) - seq_printf(p, "%10u ", per_cpu(irq_stat, cpu).irqs[irq]); + seq_printf(p, "%10u ", + per_cpu(irq_stat, cpu).irqs[irq]); if (irqclass_sub_desc[irq].desc) seq_printf(p, " %s", irqclass_sub_desc[irq].desc); seq_putc(p, '\n'); } -skip_arch_irqs: +out: put_online_cpus(); return 0; } +int arch_show_interrupts(struct seq_file *p, int prec) +{ + return 0; +} + /* * Switch to the asynchronous interrupt stack for softirq execution. */ @@ -159,14 +192,6 @@ asmlinkage void do_softirq(void) local_irq_restore(flags); } -#ifdef CONFIG_PROC_FS -void init_irq_proc(void) -{ - if (proc_mkdir("irq", NULL)) - create_prof_cpu_mask(); -} -#endif - /* * ext_int_hash[index] is the list head for all external interrupts that hash * to this index. @@ -183,14 +208,6 @@ struct ext_int_info { /* ext_int_hash_lock protects the handler lists for external interrupts */ DEFINE_SPINLOCK(ext_int_hash_lock); -static void __init init_external_interrupts(void) -{ - int idx; - - for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) - INIT_LIST_HEAD(&ext_int_hash[idx]); -} - static inline int ext_hash(u16 code) { return (code + (code >> 9)) & 0xff; @@ -234,20 +251,13 @@ int unregister_external_interrupt(u16 code, ext_int_handler_t handler) } EXPORT_SYMBOL(unregister_external_interrupt); -void __irq_entry do_extint(struct pt_regs *regs) +static irqreturn_t do_ext_interrupt(int irq, void *dummy) { + struct pt_regs *regs = get_irq_regs(); struct ext_code ext_code; - struct pt_regs *old_regs; struct ext_int_info *p; int index; - old_regs = set_irq_regs(regs); - irq_enter(); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) { - /* Serve timer interrupts first. */ - clock_comparator_work(); - } - kstat_incr_irqs_this_cpu(EXTERNAL_INTERRUPT, NULL); ext_code = *(struct ext_code *) ®s->int_code; if (ext_code.code != 0x1004) __get_cpu_var(s390_idle).nohz_delay = 1; @@ -259,13 +269,25 @@ void __irq_entry do_extint(struct pt_regs *regs) p->handler(ext_code, regs->int_parm, regs->int_parm_long); rcu_read_unlock(); - irq_exit(); - set_irq_regs(old_regs); + + return IRQ_HANDLED; } -void __init init_IRQ(void) +static struct irqaction external_interrupt = { + .name = "EXT", + .handler = do_ext_interrupt, +}; + +void __init init_ext_interrupts(void) { - init_external_interrupts(); + int idx; + + for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++) + INIT_LIST_HEAD(&ext_int_hash[idx]); + + irq_set_chip_and_handler(EXT_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(EXT_INTERRUPT, &external_interrupt); } static DEFINE_SPINLOCK(sc_irq_lock); @@ -313,69 +335,3 @@ void measurement_alert_subclass_unregister(void) spin_unlock(&ma_subclass_lock); } EXPORT_SYMBOL(measurement_alert_subclass_unregister); - -#ifdef CONFIG_SMP -void synchronize_irq(unsigned int irq) -{ - /* - * Not needed, the handler is protected by a lock and IRQs that occur - * after the handler is deleted are just NOPs. - */ -} -EXPORT_SYMBOL_GPL(synchronize_irq); -#endif - -#ifndef CONFIG_PCI - -/* Only PCI devices have dynamically-defined IRQ handlers */ - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(free_irq); - -void enable_irq(unsigned int irq) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(enable_irq); - -void disable_irq(unsigned int irq) -{ - WARN_ON(1); -} -EXPORT_SYMBOL_GPL(disable_irq); - -#endif /* !CONFIG_PCI */ - -void disable_irq_nosync(unsigned int irq) -{ - disable_irq(irq); -} -EXPORT_SYMBOL_GPL(disable_irq_nosync); - -unsigned long probe_irq_on(void) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_on); - -int probe_irq_off(unsigned long val) -{ - return 0; -} -EXPORT_SYMBOL_GPL(probe_irq_off); - -unsigned int probe_irq_mask(unsigned long val) -{ - return val; -} -EXPORT_SYMBOL_GPL(probe_irq_mask); diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 086a2e37935d..a9e1dc4ae442 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -2,5 +2,5 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_msi.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 27e7fed3707d..d65dc4f50e2a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -50,24 +50,23 @@ EXPORT_SYMBOL_GPL(zpci_list); DEFINE_MUTEX(zpci_list_lock); EXPORT_SYMBOL_GPL(zpci_list_lock); + +static void zpci_enable_irq(struct irq_data *data); +static void zpci_disable_irq(struct irq_data *data); + +static struct irq_chip zpci_irq_chip = { + .name = "zPCI", + .irq_unmask = zpci_enable_irq, + .irq_mask = zpci_disable_irq, +}; + static struct pci_hp_callback_ops *hotplug_ops; static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); static DEFINE_SPINLOCK(zpci_domain_lock); -struct callback { - irq_handler_t handler; - void *data; -}; - -struct zdev_irq_map { - struct airq_iv *aibv; /* Adapter interrupt bit vector */ - struct callback *cb; /* callback handler array */ - int msi_vecs; /* consecutive MSI-vectors used */ -}; - static struct airq_iv *zpci_aisb_iv; -static struct zdev_irq_map *zpci_imap[ZPCI_NR_DEVICES]; +static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES]; /* Adapter interrupt definitions */ static void zpci_irq_handler(struct airq_struct *airq); @@ -83,19 +82,8 @@ static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); -static struct kmem_cache *zdev_irq_cache; static struct kmem_cache *zdev_fmb_cache; -static inline int irq_to_msi_nr(unsigned int irq) -{ - return irq & ZPCI_MSI_VEC_MASK; -} - -static inline int irq_to_dev_nr(unsigned int irq) -{ - return irq >> ZPCI_MSI_VEC_BITS; -} - struct zpci_dev *get_zdev(struct pci_dev *pdev) { return (struct zpci_dev *) pdev->sysdata; @@ -283,21 +271,42 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } -void enable_irq(unsigned int irq) +static int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) { - struct msi_desc *msi = irq_get_msi_desc(irq); + int offset, pos; + u32 mask_bits; + + if (msi->msi_attrib.is_msix) { + offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + msi->masked = readl(msi->mask_base + offset); + writel(flag, msi->mask_base + offset); + } else if (msi->msi_attrib.maskbit) { + pos = (long) msi->mask_base; + pci_read_config_dword(msi->dev, pos, &mask_bits); + mask_bits &= ~(mask); + mask_bits |= flag & mask; + pci_write_config_dword(msi->dev, pos, mask_bits); + } else + return 0; + + msi->msi_attrib.maskbit = !!flag; + return 1; +} + +static void zpci_enable_irq(struct irq_data *data) +{ + struct msi_desc *msi = irq_get_msi_desc(data->irq); zpci_msi_set_mask_bits(msi, 1, 0); } -EXPORT_SYMBOL_GPL(enable_irq); -void disable_irq(unsigned int irq) +static void zpci_disable_irq(struct irq_data *data) { - struct msi_desc *msi = irq_get_msi_desc(irq); + struct msi_desc *msi = irq_get_msi_desc(data->irq); zpci_msi_set_mask_bits(msi, 1, 1); } -EXPORT_SYMBOL_GPL(disable_irq); void pcibios_fixup_bus(struct pci_bus *bus) { @@ -385,7 +394,7 @@ static struct pci_ops pci_root_ops = { static void zpci_irq_handler(struct airq_struct *airq) { unsigned long si, ai; - struct zdev_irq_map *imap; + struct airq_iv *aibv; int irqs_on = 0; inc_irq_stat(IRQIO_PCI); @@ -403,69 +412,33 @@ static void zpci_irq_handler(struct airq_struct *airq) } /* Scan the adapter interrupt vector for this device. */ - imap = zpci_imap[si]; + aibv = zpci_aibv[si]; for (ai = 0;;) { - ai = airq_iv_scan(imap->aibv, ai, imap->msi_vecs); + ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv)); if (ai == -1UL) break; inc_irq_stat(IRQIO_MSI); - airq_iv_lock(imap->aibv, ai); - if (imap->cb[ai].handler) - imap->cb[ai].handler(ai, imap->cb[ai].data); - airq_iv_unlock(imap->aibv, ai); + airq_iv_lock(aibv, ai); + generic_handle_irq(airq_iv_get_data(aibv, ai)); + airq_iv_unlock(aibv, ai); } } } -static int zpci_alloc_msi(struct zpci_dev *zdev, int msi_vecs) -{ - unsigned long size; - - /* Alloc aibv & callback space */ - zdev->irq_map = kmem_cache_zalloc(zdev_irq_cache, GFP_KERNEL); - if (!zdev->irq_map) - goto out; - /* Store the number of used MSI vectors */ - zdev->irq_map->msi_vecs = msi_vecs; - /* Allocate callback array */ - size = sizeof(struct callback) * msi_vecs; - zdev->irq_map->cb = kzalloc(size, GFP_KERNEL); - if (!zdev->irq_map->cb) - goto out_map; - /* Allocate msi_map array */ - size = sizeof(struct msi_map) * msi_vecs; - zdev->msi_map = kzalloc(size, GFP_KERNEL); - if (!zdev->msi_map) - goto out_cb; - return 0; - -out_cb: - kfree(zdev->irq_map->cb); -out_map: - kmem_cache_free(zdev_irq_cache, zdev->irq_map); -out: - return -ENOMEM; -} - -static void zpci_free_msi(struct zpci_dev *zdev) -{ - kfree(zdev->msi_map); - kfree(zdev->irq_map->cb); - kmem_cache_free(zdev_irq_cache, zdev->irq_map); -} - int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { struct zpci_dev *zdev = get_zdev(pdev); - unsigned int msi_nr, msi_vecs; + unsigned int hwirq, irq, msi_vecs; unsigned long aisb; struct msi_desc *msi; + struct msi_msg msg; int rc; pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) return -EINVAL; msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); + msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); /* Allocate adapter summary indicator bit */ rc = -EIO; @@ -476,30 +449,31 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) /* Create adapter interrupt vector */ rc = -ENOMEM; - zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_BITLOCK); + zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK); if (!zdev->aibv) goto out_si; - /* Allocate data structures for msi interrupts */ - rc = zpci_alloc_msi(zdev, msi_vecs); - if (rc) - goto out_iv; - /* Wire up shortcut pointer */ - zpci_imap[aisb] = zdev->irq_map; - zdev->irq_map->aibv = zdev->aibv; + zpci_aibv[aisb] = zdev->aibv; - /* - * TODO: irq number 0 wont be found if we return less than the - * requested MSIs. Ignore it for now and fix in common code. - */ - msi_nr = aisb << ZPCI_MSI_VEC_BITS; + /* Request MSI interrupts */ + hwirq = 0; list_for_each_entry(msi, &pdev->msi_list, list) { - rc = zpci_setup_msi_irq(zdev, msi, msi_nr, - aisb << ZPCI_MSI_VEC_BITS); + rc = -EIO; + irq = irq_alloc_desc(0); /* Alloc irq on node 0 */ + if (irq == NO_IRQ) + goto out_msi; + rc = irq_set_msi_desc(irq, msi); if (rc) goto out_msi; - msi_nr++; + irq_set_chip_and_handler(irq, &zpci_irq_chip, + handle_simple_irq); + msg.data = hwirq; + msg.address_lo = zdev->msi_addr & 0xffffffff; + msg.address_hi = zdev->msi_addr >> 32; + write_msi_msg(irq, &msg); + airq_iv_set_data(zdev->aibv, hwirq, irq); + hwirq++; } /* Enable adapter interrupts */ @@ -510,14 +484,17 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return (msi_vecs == nvec) ? 0 : msi_vecs; out_msi: - msi_nr -= aisb << ZPCI_MSI_VEC_BITS; list_for_each_entry(msi, &pdev->msi_list, list) { - if (msi_nr-- == 0) + if (hwirq-- == 0) break; - zpci_teardown_msi_irq(zdev, msi); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; } - zpci_free_msi(zdev); -out_iv: + zpci_aibv[aisb] = NULL; airq_iv_release(zdev->aibv); out_si: airq_iv_free_bit(zpci_aisb_iv, aisb); @@ -541,10 +518,18 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) return; } - list_for_each_entry(msi, &pdev->msi_list, list) - zpci_teardown_msi_irq(zdev, msi); + /* Release MSI interrupts */ + list_for_each_entry(msi, &pdev->msi_list, list) { + zpci_msi_set_mask_bits(msi, 1, 1); + irq_set_msi_desc(msi->irq, NULL); + irq_free_desc(msi->irq); + msi->msg.address_lo = 0; + msi->msg.address_hi = 0; + msi->msg.data = 0; + msi->irq = 0; + } - zpci_free_msi(zdev); + zpci_aibv[zdev->aisb] = NULL; airq_iv_release(zdev->aibv); airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } @@ -625,61 +610,6 @@ int pcibios_add_platform_entries(struct pci_dev *pdev) return zpci_sysfs_add_device(&pdev->dev); } -int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) -{ - unsigned int msi_nr = irq_to_msi_nr(irq); - unsigned int dev_nr = irq_to_dev_nr(irq); - struct zdev_irq_map *imap; - struct msi_desc *msi; - - msi = irq_get_msi_desc(irq); - if (!msi) - return -EIO; - - imap = zpci_imap[dev_nr]; - imap->cb[msi_nr].handler = handler; - imap->cb[msi_nr].data = data; - - /* - * The generic MSI code returns with the interrupt disabled on the - * card, using the MSI mask bits. Firmware doesn't appear to unmask - * at that level, so we do it here by hand. - */ - zpci_msi_set_mask_bits(msi, 1, 0); - return 0; -} - -void zpci_free_irq(unsigned int irq) -{ - unsigned int msi_nr = irq_to_msi_nr(irq); - unsigned int dev_nr = irq_to_dev_nr(irq); - struct zdev_irq_map *imap; - struct msi_desc *msi; - - /* Disable interrupt */ - msi = irq_get_msi_desc(irq); - if (!msi) - return; - zpci_msi_set_mask_bits(msi, 1, 1); - imap = zpci_imap[dev_nr]; - imap->cb[msi_nr].handler = NULL; - imap->cb[msi_nr].data = NULL; - synchronize_rcu(); -} - -int request_irq(unsigned int irq, irq_handler_t handler, - unsigned long irqflags, const char *devname, void *dev_id) -{ - return zpci_request_irq(irq, handler, dev_id); -} -EXPORT_SYMBOL_GPL(request_irq); - -void free_irq(unsigned int irq, void *dev_id) -{ - zpci_free_irq(irq); -} -EXPORT_SYMBOL_GPL(free_irq); - static int __init zpci_irq_init(void) { int rc; @@ -930,15 +860,10 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { - zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map), - L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL); - if (!zdev_irq_cache) - goto error_zdev; - zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), 16, 0, NULL); if (!zdev_fmb_cache) - goto error_fmb; + goto error_zdev; /* TODO: use realloc */ zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), @@ -949,8 +874,6 @@ static int zpci_mem_init(void) error_iomap: kmem_cache_destroy(zdev_fmb_cache); -error_fmb: - kmem_cache_destroy(zdev_irq_cache); error_zdev: return -ENOMEM; } @@ -958,7 +881,6 @@ error_zdev: static void zpci_mem_exit(void) { kfree(zpci_iomap_start); - kmem_cache_destroy(zdev_irq_cache); kmem_cache_destroy(zdev_fmb_cache); } @@ -1007,16 +929,12 @@ static int __init pci_base_init(void) rc = zpci_debug_init(); if (rc) - return rc; + goto out; rc = zpci_mem_init(); if (rc) goto out_mem; - rc = zpci_msihash_init(); - if (rc) - goto out_hash; - rc = zpci_irq_init(); if (rc) goto out_irq; @@ -1036,11 +954,10 @@ out_find: out_dma: zpci_irq_exit(); out_irq: - zpci_msihash_exit(); -out_hash: zpci_mem_exit(); out_mem: zpci_debug_exit(); +out: return rc; } subsys_initcall(pci_base_init); diff --git a/arch/s390/pci/pci_msi.c b/arch/s390/pci/pci_msi.c deleted file mode 100644 index d4b480a18b0f..000000000000 --- a/arch/s390/pci/pci_msi.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber - */ - -#define COMPONENT "zPCI" -#define pr_fmt(fmt) COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include - -/* mapping of irq numbers to msi_desc */ -static struct hlist_head *msi_hash; -static const unsigned int msi_hash_bits = 8; -#define MSI_HASH_BUCKETS (1U << msi_hash_bits) -#define msi_hashfn(nr) hash_long(nr, msi_hash_bits) - -static DEFINE_SPINLOCK(msi_map_lock); - -struct msi_desc *__irq_get_msi_desc(unsigned int irq) -{ - struct msi_map *map; - - hlist_for_each_entry_rcu(map, - &msi_hash[msi_hashfn(irq)], msi_chain) - if (map->irq == irq) - return map->msi; - return NULL; -} - -int zpci_msi_set_mask_bits(struct msi_desc *msi, u32 mask, u32 flag) -{ - if (msi->msi_attrib.is_msix) { - int offset = msi->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + - PCI_MSIX_ENTRY_VECTOR_CTRL; - msi->masked = readl(msi->mask_base + offset); - writel(flag, msi->mask_base + offset); - } else { - if (msi->msi_attrib.maskbit) { - int pos; - u32 mask_bits; - - pos = (long) msi->mask_base; - pci_read_config_dword(msi->dev, pos, &mask_bits); - mask_bits &= ~(mask); - mask_bits |= flag & mask; - pci_write_config_dword(msi->dev, pos, mask_bits); - } else { - return 0; - } - } - - msi->msi_attrib.maskbit = !!flag; - return 1; -} - -int zpci_setup_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi, - unsigned int nr, int offset) -{ - struct msi_map *map; - struct msi_msg msg; - int rc; - - map = zdev->msi_map + (nr & ZPCI_MSI_VEC_MASK); - map->irq = nr; - map->msi = msi; - INIT_HLIST_NODE(&map->msi_chain); - - pr_debug("%s hashing irq: %u to bucket nr: %llu\n", - __func__, nr, msi_hashfn(nr)); - hlist_add_head_rcu(&map->msi_chain, &msi_hash[msi_hashfn(nr)]); - - spin_lock(&msi_map_lock); - rc = irq_set_msi_desc(nr, msi); - if (rc) { - spin_unlock(&msi_map_lock); - hlist_del_rcu(&map->msi_chain); - return rc; - } - spin_unlock(&msi_map_lock); - - msg.data = nr - offset; - msg.address_lo = zdev->msi_addr & 0xffffffff; - msg.address_hi = zdev->msi_addr >> 32; - write_msi_msg(nr, &msg); - return 0; -} - -void zpci_teardown_msi_irq(struct zpci_dev *zdev, struct msi_desc *msi) -{ - int nr = msi->irq & ZPCI_MSI_VEC_MASK; - struct msi_map *map; - - zpci_msi_set_mask_bits(msi, 1, 1); - msi->msg.address_lo = 0; - msi->msg.address_hi = 0; - msi->msg.data = 0; - msi->irq = 0; - - spin_lock(&msi_map_lock); - map = zdev->msi_map + nr; - hlist_del_rcu(&map->msi_chain); - spin_unlock(&msi_map_lock); -} - -/* - * The msi hash table has 256 entries which is good for 4..20 - * devices (a typical device allocates 10 + CPUs MSI's). Maybe make - * the hash table size adjustable later. - */ -int __init zpci_msihash_init(void) -{ - unsigned int i; - - msi_hash = kmalloc(MSI_HASH_BUCKETS * sizeof(*msi_hash), GFP_KERNEL); - if (!msi_hash) - return -ENOMEM; - - for (i = 0; i < MSI_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&msi_hash[i]); - return 0; -} - -void __init zpci_msihash_exit(void) -{ - kfree(msi_hash); -} diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c index 6ead6d076445..d028fd800c9c 100644 --- a/drivers/s390/cio/airq.c +++ b/drivers/s390/cio/airq.c @@ -81,17 +81,34 @@ void unregister_adapter_interrupt(struct airq_struct *airq) } EXPORT_SYMBOL(unregister_adapter_interrupt); -void do_adapter_IO(u8 isc) +static irqreturn_t do_airq_interrupt(int irq, void *dummy) { + struct tpi_info *tpi_info; struct airq_struct *airq; struct hlist_head *head; - head = &airq_lists[isc]; + __this_cpu_write(s390_idle.nohz_delay, 1); + tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; + head = &airq_lists[tpi_info->isc]; rcu_read_lock(); hlist_for_each_entry_rcu(airq, head, list) if ((*airq->lsi_ptr & airq->lsi_mask) != 0) airq->handler(airq); rcu_read_unlock(); + + return IRQ_HANDLED; +} + +static struct irqaction airq_interrupt = { + .name = "AIO", + .handler = do_airq_interrupt, +}; + +void __init init_airq_interrupts(void) +{ + irq_set_chip_and_handler(THIN_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(THIN_INTERRUPT, &airq_interrupt); } /** diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 4eeb4a6bf207..d7da67a31c77 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -561,37 +561,23 @@ out: } /* - * do_IRQ() handles all normal I/O device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - * + * do_cio_interrupt() handles all normal I/O device IRQ's */ -void __irq_entry do_IRQ(struct pt_regs *regs) +static irqreturn_t do_cio_interrupt(int irq, void *dummy) { - struct tpi_info *tpi_info = (struct tpi_info *) ®s->int_code; + struct tpi_info *tpi_info; struct subchannel *sch; struct irb *irb; - struct pt_regs *old_regs; - old_regs = set_irq_regs(regs); - irq_enter(); __this_cpu_write(s390_idle.nohz_delay, 1); - if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) - /* Serve timer interrupts first. */ - clock_comparator_work(); - - kstat_incr_irqs_this_cpu(IO_INTERRUPT, NULL); + tpi_info = (struct tpi_info *) &get_irq_regs()->int_code; irb = (struct irb *) &S390_lowcore.irb; - if (tpi_info->adapter_IO) { - do_adapter_IO(tpi_info->isc); - goto out; - } sch = (struct subchannel *)(unsigned long) tpi_info->intparm; if (!sch) { /* Clear pending interrupt condition. */ inc_irq_stat(IRQIO_CIO); tsch(tpi_info->schid, irb); - goto out; + return IRQ_HANDLED; } spin_lock(sch->lock); /* Store interrupt response block to lowcore. */ @@ -606,9 +592,23 @@ void __irq_entry do_IRQ(struct pt_regs *regs) } else inc_irq_stat(IRQIO_CIO); spin_unlock(sch->lock); -out: - irq_exit(); - set_irq_regs(old_regs); + + return IRQ_HANDLED; +} + +static struct irq_desc *irq_desc_io; + +static struct irqaction io_interrupt = { + .name = "IO", + .handler = do_cio_interrupt, +}; + +void __init init_cio_interrupts(void) +{ + irq_set_chip_and_handler(IO_INTERRUPT, + &dummy_irq_chip, handle_percpu_irq); + setup_irq(IO_INTERRUPT, &io_interrupt); + irq_desc_io = irq_to_desc(IO_INTERRUPT); } #ifdef CONFIG_CCW_CONSOLE @@ -635,7 +635,7 @@ void cio_tsch(struct subchannel *sch) local_bh_disable(); irq_enter(); } - kstat_incr_irqs_this_cpu(IO_INTERRUPT, NULL); + kstat_incr_irqs_this_cpu(IO_INTERRUPT, irq_desc_io); if (sch->driver && sch->driver->irq) sch->driver->irq(sch); else diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h index d62f5e7f3cf1..d42f67412bd8 100644 --- a/drivers/s390/cio/cio.h +++ b/drivers/s390/cio/cio.h @@ -121,9 +121,6 @@ extern int cio_commit_config(struct subchannel *sch); int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key); int cio_tm_intrg(struct subchannel *sch); -void do_adapter_IO(u8 isc); -void do_IRQ(struct pt_regs *); - /* Use with care. */ #ifdef CONFIG_CCW_CONSOLE extern struct subchannel *cio_probe_console(void); From 416fd0ffb14afead5b1feea14bbf33c2277942ef Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Jul 2013 09:39:30 +0200 Subject: [PATCH 08/32] s390/mm: remove dead pfmf inline assembly Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5d64fb7619cc..27f04801ec8b 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -32,16 +32,6 @@ void storage_key_init_range(unsigned long start, unsigned long end); -static inline unsigned long pfmf(unsigned long function, unsigned long address) -{ - asm volatile( - " .insn rre,0xb9af0000,%[function],%[address]" - : [address] "+a" (address) - : [function] "d" (function) - : "memory"); - return address; -} - static inline void clear_page(void *page) { register unsigned long reg1 asm ("1") = 0; From e509861105a3c1425f3f929bd631f88340b499bf Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Jul 2013 20:57:57 +0200 Subject: [PATCH 09/32] s390/mm: cleanup page table definitions Improve the encoding of the different pte types and the naming of the page, segment table and region table bits. Due to the different pte encoding the hugetlbfs primitives need to be adapted as well. To improve compatability with common code make the huge ptes use the encoding of normal ptes. The conversion between the pte and pmd encoding for a huge pte is done with set_huge_pte_at and huge_ptep_get. Overall the code is now easier to understand. Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/hugetlb.h | 151 +++++----------- arch/s390/include/asm/pgtable.h | 304 +++++++++++++++----------------- arch/s390/kernel/vdso.c | 4 +- arch/s390/lib/uaccess_pt.c | 16 +- arch/s390/mm/dump_pagetables.c | 18 +- arch/s390/mm/gup.c | 6 +- arch/s390/mm/hugetlbpage.c | 106 +++++++++-- arch/s390/mm/pageattr.c | 2 +- arch/s390/mm/pgtable.c | 59 ++++--- arch/s390/mm/vmem.c | 14 +- 10 files changed, 344 insertions(+), 336 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index bd90359d6d22..11eae5f55b70 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -17,6 +17,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); +pte_t huge_ptep_get(pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep); /* * If the arch doesn't supply something else, assume that hugepage @@ -38,147 +41,75 @@ static inline int prepare_hugepage_range(struct file *file, int arch_prepare_hugepage(struct page *page); void arch_release_hugepage(struct page *page); -static inline pte_t huge_pte_wrprotect(pte_t pte) +static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) { - pte_val(pte) |= _PAGE_RO; - return pte; + pte_val(*ptep) = _SEGMENT_ENTRY_EMPTY; } -static inline int huge_pte_none(pte_t pte) -{ - return (pte_val(pte) & _SEGMENT_ENTRY_INV) && - !(pte_val(pte) & _SEGMENT_ENTRY_RO); -} - -static inline pte_t huge_ptep_get(pte_t *ptep) -{ - pte_t pte = *ptep; - unsigned long mask; - - if (!MACHINE_HAS_HPAGE) { - ptep = (pte_t *) (pte_val(pte) & _SEGMENT_ENTRY_ORIGIN); - if (ptep) { - mask = pte_val(pte) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte = pte_mkhuge(*ptep); - pte_val(pte) |= mask; - } - } - return pte; -} - -static inline void __pmd_csp(pmd_t *pmdp) -{ - register unsigned long reg2 asm("2") = pmd_val(*pmdp); - register unsigned long reg3 asm("3") = pmd_val(*pmdp) | - _SEGMENT_ENTRY_INV; - register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; - - asm volatile( - " csp %1,%3" - : "=m" (*pmdp) - : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); -} - -static inline void huge_ptep_invalidate(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - pmd_t *pmdp = (pmd_t *) ptep; - - if (MACHINE_HAS_IDTE) - __pmd_idte(address, pmdp); - else - __pmd_csp(pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY; -} - -static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - pte_t pte = huge_ptep_get(ptep); - - huge_ptep_invalidate(mm, addr, ptep); - return pte; -} - -#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \ -({ \ - int __changed = !pte_same(huge_ptep_get(__ptep), __entry); \ - if (__changed) { \ - huge_ptep_invalidate((__vma)->vm_mm, __addr, __ptep); \ - set_huge_pte_at((__vma)->vm_mm, __addr, __ptep, __entry); \ - } \ - __changed; \ -}) - -#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \ -({ \ - pte_t __pte = huge_ptep_get(__ptep); \ - if (huge_pte_write(__pte)) { \ - huge_ptep_invalidate(__mm, __addr, __ptep); \ - set_huge_pte_at(__mm, __addr, __ptep, \ - huge_pte_wrprotect(__pte)); \ - } \ -}) - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - huge_ptep_invalidate(vma->vm_mm, address, ptep); + huge_ptep_get_and_clear(vma->vm_mm, address, ptep); +} + +static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep, + pte_t pte, int dirty) +{ + int changed = !pte_same(huge_ptep_get(ptep), pte); + if (changed) { + huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + set_huge_pte_at(vma->vm_mm, addr, ptep, pte); + } + return changed; +} + +static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot) { - pte_t pte; - pmd_t pmd; + return mk_pte(page, pgprot); +} - pmd = mk_pmd_phys(page_to_phys(page), pgprot); - pte_val(pte) = pmd_val(pmd); - return pte; +static inline int huge_pte_none(pte_t pte) +{ + return pte_none(pte); } static inline int huge_pte_write(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - return pmd_write(pmd); + return pte_write(pte); } static inline int huge_pte_dirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return 0; + return pte_dirty(pte); } static inline pte_t huge_pte_mkwrite(pte_t pte) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_mkwrite(pmd)); - return pte; + return pte_mkwrite(pte); } static inline pte_t huge_pte_mkdirty(pte_t pte) { - /* No dirty bit in the segment table entry. */ - return pte; + return pte_mkdirty(pte); +} + +static inline pte_t huge_pte_wrprotect(pte_t pte) +{ + return pte_wrprotect(pte); } static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot) { - pmd_t pmd; - - pmd_val(pmd) = pte_val(pte); - pte_val(pte) = pmd_val(pmd_modify(pmd, newprot)); - return pte; -} - -static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) -{ - pmd_clear((pmd_t *) ptep); + return pte_modify(pte, newprot); } #endif /* _ASM_S390_HUGETLB_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 75fb726de91f..b09c00b5cfa2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -217,63 +217,50 @@ extern unsigned long MODULES_END; /* Hardware bits in the page table entry */ #define _PAGE_CO 0x100 /* HW Change-bit override */ -#define _PAGE_RO 0x200 /* HW read-only bit */ +#define _PAGE_PROTECT 0x200 /* HW read-only bit */ #define _PAGE_INVALID 0x400 /* HW invalid bit */ +#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */ /* Software bits in the page table entry */ -#define _PAGE_SWT 0x001 /* SW pte type bit t */ -#define _PAGE_SWX 0x002 /* SW pte type bit x */ -#define _PAGE_SWC 0x004 /* SW pte changed bit */ -#define _PAGE_SWR 0x008 /* SW pte referenced bit */ -#define _PAGE_SWW 0x010 /* SW pte write bit */ +#define _PAGE_PRESENT 0x001 /* SW pte present bit */ +#define _PAGE_TYPE 0x002 /* SW pte type bit */ +#define _PAGE_YOUNG 0x004 /* SW pte young bit */ +#define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ +#define _PAGE_WRITE 0x010 /* SW pte write bit */ #define _PAGE_SPECIAL 0x020 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \ - _PAGE_SWC | _PAGE_SWR) - -/* Six different types of pages. */ -#define _PAGE_TYPE_EMPTY 0x400 -#define _PAGE_TYPE_NONE 0x401 -#define _PAGE_TYPE_SWAP 0x403 -#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */ -#define _PAGE_TYPE_RO 0x200 -#define _PAGE_TYPE_RW 0x000 + _PAGE_DIRTY | _PAGE_YOUNG) /* - * Only four types for huge pages, using the invalid bit and protection bit - * of a segment table entry. - */ -#define _HPAGE_TYPE_EMPTY 0x020 /* _SEGMENT_ENTRY_INV */ -#define _HPAGE_TYPE_NONE 0x220 -#define _HPAGE_TYPE_RO 0x200 /* _SEGMENT_ENTRY_RO */ -#define _HPAGE_TYPE_RW 0x000 - -/* - * PTE type bits are rather complicated. handle_pte_fault uses pte_present, - * pte_none and pte_file to find out the pte type WITHOUT holding the page - * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to - * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs - * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards. - * This change is done while holding the lock, but the intermediate step - * of a previously valid pte with the hw invalid bit set can be observed by - * handle_pte_fault. That makes it necessary that all valid pte types with - * the hw invalid bit set must be distinguishable from the four pte types - * empty, none, swap and file. + * handle_pte_fault uses pte_present, pte_none and pte_file to find out the + * pte type WITHOUT holding the page table lock. The _PAGE_PRESENT bit + * is used to distinguish present from not-present ptes. It is changed only + * with the page table lock held. * - * irxt ipte irxt - * _PAGE_TYPE_EMPTY 1000 -> 1000 - * _PAGE_TYPE_NONE 1001 -> 1001 - * _PAGE_TYPE_SWAP 1011 -> 1011 - * _PAGE_TYPE_FILE 11?1 -> 11?1 - * _PAGE_TYPE_RO 0100 -> 1100 - * _PAGE_TYPE_RW 0000 -> 1000 + * The following table gives the different possible bit combinations for + * the pte hardware and software bits in the last 12 bits of a pte: * - * pte_none is true for bits combinations 1000, 1010, 1100, 1110 - * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001 - * pte_file is true for bits combinations 1101, 1111 - * swap pte is 1011 and 0001, 0011, 0101, 0111 are invalid. + * 842100000000 + * 000084210000 + * 000000008421 + * .IR....wdytp + * empty .10....00000 + * swap .10....xxx10 + * file .11....xxxx0 + * prot-none, clean .11....00x01 + * prot-none, dirty .10....01x01 + * read-only, clean .01....00x01 + * read-only, dirty .01....01x01 + * read-write, clean .01....10x01 + * read-write, dirty .00....11x01 + * + * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 + * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 + * pte_file is true for the bit pattern .11...xxxxx0, (pte & 0x601) == 0x600 + * pte_swap is true for the bit pattern .10...xxxx10, (pte & 0x603) == 0x402 */ #ifndef CONFIG_64BIT @@ -287,13 +274,13 @@ extern unsigned long MODULES_END; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf0000000UL @@ -324,8 +311,8 @@ extern unsigned long MODULES_END; /* Bits in the region table entry */ #define _REGION_ENTRY_ORIGIN ~0xfffUL/* region/segment table origin */ -#define _REGION_ENTRY_RO 0x200 /* region protection bit */ -#define _REGION_ENTRY_INV 0x20 /* invalid region table entry */ +#define _REGION_ENTRY_PROTECT 0x200 /* region protection bit */ +#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */ #define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */ #define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */ #define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */ @@ -333,11 +320,11 @@ extern unsigned long MODULES_END; #define _REGION_ENTRY_LENGTH 0x03 /* region third length */ #define _REGION1_ENTRY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_LENGTH) -#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INV) +#define _REGION1_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID) #define _REGION2_ENTRY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_LENGTH) -#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INV) +#define _REGION2_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_LENGTH) -#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INV) +#define _REGION3_ENTRY_EMPTY (_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID) #define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */ #define _REGION3_ENTRY_RO 0x200 /* page protection bit */ @@ -346,16 +333,17 @@ extern unsigned long MODULES_END; /* Bits in the segment table entry */ #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ -#define _SEGMENT_ENTRY_RO 0x200 /* page protection bit */ -#define _SEGMENT_ENTRY_INV 0x20 /* invalid segment table entry */ +#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ +#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY (0) -#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INV) +#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ +#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ + #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ -#define _SEGMENT_ENTRY_SPLIT (1UL << _SEGMENT_ENTRY_SPLIT_BIT) /* Set of bits not changed in pmd_modify */ #define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \ @@ -386,14 +374,13 @@ extern unsigned long MODULES_END; /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE) -#define PAGE_RO __pgprot(_PAGE_TYPE_RO) -#define PAGE_RW __pgprot(_PAGE_TYPE_RO | _PAGE_SWW) -#define PAGE_RWC __pgprot(_PAGE_TYPE_RW | _PAGE_SWW | _PAGE_SWC) +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_PROTECT) -#define PAGE_KERNEL PAGE_RWC -#define PAGE_SHARED PAGE_KERNEL -#define PAGE_COPY PAGE_RO +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) /* * On s390 the page table entry has an invalid bit and a read-only bit. @@ -402,29 +389,30 @@ extern unsigned long MODULES_END; */ /*xwr*/ #define __P000 PAGE_NONE -#define __P001 PAGE_RO -#define __P010 PAGE_RO -#define __P011 PAGE_RO -#define __P100 PAGE_RO -#define __P101 PAGE_RO -#define __P110 PAGE_RO -#define __P111 PAGE_RO +#define __P001 PAGE_READ +#define __P010 PAGE_READ +#define __P011 PAGE_READ +#define __P100 PAGE_READ +#define __P101 PAGE_READ +#define __P110 PAGE_READ +#define __P111 PAGE_READ #define __S000 PAGE_NONE -#define __S001 PAGE_RO -#define __S010 PAGE_RW -#define __S011 PAGE_RW -#define __S100 PAGE_RO -#define __S101 PAGE_RO -#define __S110 PAGE_RW -#define __S111 PAGE_RW +#define __S001 PAGE_READ +#define __S010 PAGE_WRITE +#define __S011 PAGE_WRITE +#define __S100 PAGE_READ +#define __S101 PAGE_READ +#define __S110 PAGE_WRITE +#define __S111 PAGE_WRITE /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE) -#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO) -#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW) +#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_PROTECT) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT) +#define SEGMENT_WRITE __pgprot(0) static inline int mm_exclusive(struct mm_struct *mm) { @@ -467,7 +455,7 @@ static inline int pgd_none(pgd_t pgd) { if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2) return 0; - return (pgd_val(pgd) & _REGION_ENTRY_INV) != 0UL; + return (pgd_val(pgd) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pgd_bad(pgd_t pgd) @@ -478,7 +466,7 @@ static inline int pgd_bad(pgd_t pgd) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pgd_val(pgd) & mask) != 0; } @@ -494,7 +482,7 @@ static inline int pud_none(pud_t pud) { if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3) return 0; - return (pud_val(pud) & _REGION_ENTRY_INV) != 0UL; + return (pud_val(pud) & _REGION_ENTRY_INVALID) != 0UL; } static inline int pud_large(pud_t pud) @@ -512,7 +500,7 @@ static inline int pud_bad(pud_t pud) * invalid for either table entry. */ unsigned long mask = - ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INV & + ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID & ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH; return (pud_val(pud) & mask) != 0; } @@ -521,21 +509,18 @@ static inline int pud_bad(pud_t pud) static inline int pmd_present(pmd_t pmd) { - unsigned long mask = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO; - return (pmd_val(pmd) & mask) == _HPAGE_TYPE_NONE || - !(pmd_val(pmd) & _SEGMENT_ENTRY_INV); + return pmd_val(pmd) != _SEGMENT_ENTRY_INVALID; } static inline int pmd_none(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_INV) && - !(pmd_val(pmd) & _SEGMENT_ENTRY_RO); + return pmd_val(pmd) == _SEGMENT_ENTRY_INVALID; } static inline int pmd_large(pmd_t pmd) { #ifdef CONFIG_64BIT - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0; #else return 0; #endif @@ -543,7 +528,7 @@ static inline int pmd_large(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INV; + unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INVALID; return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; } @@ -563,7 +548,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { - return (pmd_val(pmd) & _SEGMENT_ENTRY_RO) == 0; + return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; } static inline int pmd_young(pmd_t pmd) @@ -571,23 +556,23 @@ static inline int pmd_young(pmd_t pmd) return 0; } -static inline int pte_none(pte_t pte) -{ - return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT); -} - static inline int pte_present(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX; - return (pte_val(pte) & mask) == _PAGE_TYPE_NONE || - (!(pte_val(pte) & _PAGE_INVALID) && - !(pte_val(pte) & _PAGE_SWT)); + /* Bit pattern: (pte & 0x001) == 0x001 */ + return (pte_val(pte) & _PAGE_PRESENT) != 0; +} + +static inline int pte_none(pte_t pte) +{ + /* Bit pattern: pte == 0x400 */ + return pte_val(pte) == _PAGE_INVALID; } static inline int pte_file(pte_t pte) { - unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT; - return (pte_val(pte) & mask) == _PAGE_TYPE_FILE; + /* Bit pattern: (pte & 0x601) == 0x600 */ + return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT | _PAGE_PRESENT)) + == (_PAGE_INVALID | _PAGE_PROTECT); } static inline int pte_special(pte_t pte) @@ -695,7 +680,7 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) /* Transfer referenced bit to kvm user bits and pte */ if (young) { pgste_val(pgste) |= PGSTE_UR_BIT; - pte_val(*ptep) |= _PAGE_SWR; + pte_val(*ptep) |= _PAGE_YOUNG; } #endif return pgste; @@ -723,13 +708,13 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry) static inline void pgste_set_pte(pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_SWW)) { + if (!MACHINE_HAS_ESOP && (pte_val(entry) & _PAGE_WRITE)) { /* * Without enhanced suppression-on-protection force * the dirty bit on for all writable ptes. */ - pte_val(entry) |= _PAGE_SWC; - pte_val(entry) &= ~_PAGE_RO; + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; } *ptep = entry; } @@ -841,18 +826,18 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, */ static inline int pte_write(pte_t pte) { - return (pte_val(pte) & _PAGE_SWW) != 0; + return (pte_val(pte) & _PAGE_WRITE) != 0; } static inline int pte_dirty(pte_t pte) { - return (pte_val(pte) & _PAGE_SWC) != 0; + return (pte_val(pte) & _PAGE_DIRTY) != 0; } static inline int pte_young(pte_t pte) { #ifdef CONFIG_PGSTE - if (pte_val(pte) & _PAGE_SWR) + if (pte_val(pte) & _PAGE_YOUNG) return 1; #endif return 0; @@ -880,12 +865,12 @@ static inline void pud_clear(pud_t *pud) static inline void pmd_clear(pmd_t *pmdp) { - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + pmd_val(*pmdp) = _SEGMENT_ENTRY_INVALID; } static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; } /* @@ -896,49 +881,45 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); - if ((pte_val(pte) & _PAGE_SWC) && (pte_val(pte) & _PAGE_SWW)) - pte_val(pte) &= ~_PAGE_RO; + if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWW; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_WRITE; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { - pte_val(pte) |= _PAGE_SWW; - if (pte_val(pte) & _PAGE_SWC) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_WRITE; + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkclean(pte_t pte) { - pte_val(pte) &= ~_PAGE_SWC; - /* Do not clobber _PAGE_TYPE_NONE pages! */ - if (!(pte_val(pte) & _PAGE_INVALID)) - pte_val(pte) |= _PAGE_RO; + pte_val(pte) &= ~_PAGE_DIRTY; + pte_val(pte) |= _PAGE_PROTECT; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_SWC; - if (pte_val(pte) & _PAGE_SWW) - pte_val(pte) &= ~_PAGE_RO; + pte_val(pte) |= _PAGE_DIRTY; + if (pte_val(pte) & _PAGE_WRITE) + pte_val(pte) &= ~_PAGE_PROTECT; return pte; } static inline pte_t pte_mkold(pte_t pte) { #ifdef CONFIG_PGSTE - pte_val(pte) &= ~_PAGE_SWR; + pte_val(pte) &= ~_PAGE_YOUNG; #endif return pte; } @@ -957,7 +938,7 @@ static inline pte_t pte_mkspecial(pte_t pte) #ifdef CONFIG_HUGETLB_PAGE static inline pte_t pte_mkhuge(pte_t pte) { - pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO); + pte_val(pte) |= _PAGE_LARGE; return pte; } #endif @@ -1076,7 +1057,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte = *ptep; if (!mm_exclusive(mm)) __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1139,7 +1120,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, pte = *ptep; __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(vma->vm_mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1172,7 +1153,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pte = *ptep; if (!full) __ptep_ipte(address, ptep); - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1248,10 +1229,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) unsigned long physpage = page_to_phys(page); pte_t __pte = mk_pte_phys(physpage, pgprot); - if ((pte_val(__pte) & _PAGE_SWW) && PageDirty(page)) { - pte_val(__pte) |= _PAGE_SWC; - pte_val(__pte) &= ~_PAGE_RO; - } + if (pte_write(__pte) && PageDirty(page)) + __pte = pte_mkdirty(__pte); return __pte; } @@ -1313,7 +1292,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) unsigned long sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t); - if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INV)) { + if (!(pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)) { asm volatile( " .insn rrf,0xb98e0000,%2,%3,0,0" : "=m" (*pmdp) @@ -1324,18 +1303,31 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp) } } +static inline void __pmd_csp(pmd_t *pmdp) +{ + register unsigned long reg2 asm("2") = pmd_val(*pmdp); + register unsigned long reg3 asm("3") = pmd_val(*pmdp) | + _SEGMENT_ENTRY_INVALID; + register unsigned long reg4 asm("4") = ((unsigned long) pmdp) + 5; + + asm volatile( + " csp %1,%3" + : "=m" (*pmdp) + : "d" (reg2), "d" (reg3), "d" (reg4), "m" (*pmdp) : "cc"); +} + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) { /* - * pgprot is PAGE_NONE, PAGE_RO, or PAGE_RW (see __Pxxx / __Sxxx) + * pgprot is PAGE_NONE, PAGE_READ, or PAGE_WRITE (see __Pxxx / __Sxxx) * Convert to segment table entry format. */ if (pgprot_val(pgprot) == pgprot_val(PAGE_NONE)) return pgprot_val(SEGMENT_NONE); - if (pgprot_val(pgprot) == pgprot_val(PAGE_RO)) - return pgprot_val(SEGMENT_RO); - return pgprot_val(SEGMENT_RW); + if (pgprot_val(pgprot) == pgprot_val(PAGE_READ)) + return pgprot_val(SEGMENT_READ); + return pgprot_val(SEGMENT_WRITE); } static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) @@ -1354,9 +1346,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - /* Do not clobber _HPAGE_TYPE_NONE pages! */ - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INV)) - pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO; + /* Do not clobber PROT_NONE pages! */ + if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)) + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ @@ -1378,7 +1370,7 @@ static inline int pmd_trans_splitting(pmd_t pmd) static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1) + if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1) pmd_val(entry) |= _SEGMENT_ENTRY_CO; *pmdp = entry; } @@ -1391,7 +1383,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_RO; + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1510,10 +1502,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 21 and bit 22 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 30 and 31 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 21, 22, 30 and 31 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 1-19 and bits 24-29 to store type and offset. * We use the 5 bits from 25-29 for the type and the 20 bits from 1-19 * plus 24 for the offset. @@ -1527,10 +1517,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd) * exception will occur instead of a page translation exception. The * specifiation exception has the bad habit not to store necessary * information in the lowcore. - * Bit 53 and bit 54 are the page invalid bit and the page protection - * bit. We set both to indicate a swapped page. - * Bit 62 and 63 are used to distinguish the different page types. For - * a swapped page these bits need to be zero. + * Bits 53, 54, 62 and 63 are used to indicate the page type. + * A swap pte is indicated by bit pattern (pte & 0x603) == 0x402 * This leaves the bits 0-51 and bits 56-61 to store type and offset. * We use the 5 bits from 57-61 for the type and the 53 bits from 0-51 * plus 56 for the offset. @@ -1547,7 +1535,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) { pte_t pte; offset &= __SWP_OFFSET_MASK; - pte_val(pte) = _PAGE_TYPE_SWAP | ((type & 0x1f) << 2) | + pte_val(pte) = _PAGE_INVALID | _PAGE_TYPE | ((type & 0x1f) << 2) | ((offset & 1UL) << 7) | ((offset & ~1UL) << 11); return pte; } @@ -1570,7 +1558,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) #define pgoff_to_pte(__off) \ ((pte_t) { ((((__off) & 0x7f) << 1) + (((__off) >> 7) << 12)) \ - | _PAGE_TYPE_FILE }) + | _PAGE_INVALID | _PAGE_PROTECT }) #endif /* !__ASSEMBLY__ */ diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 3cf20930574e..05d75c413137 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -113,11 +113,11 @@ int vdso_alloc_per_cpu(struct _lowcore *lowcore) clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE << SEGMENT_ORDER); - clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) page_table, _PAGE_INVALID, 256*sizeof(unsigned long)); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; - *(unsigned long *) page_table = _PAGE_RO + page_frame; + *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; psal = (u32 *) (page_table + 256*sizeof(unsigned long)); aste = psal + 32; diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c index 50ea137a2d3c..1694d738b175 100644 --- a/arch/s390/lib/uaccess_pt.c +++ b/arch/s390/lib/uaccess_pt.c @@ -86,28 +86,28 @@ static unsigned long follow_table(struct mm_struct *mm, switch (mm->context.asce_bits & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: table = table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x39UL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION2: table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3aUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_REGION3: table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return -0x3bUL; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); /* fallthrough */ case _ASCE_TYPE_SEGMENT: table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; if (unlikely(*table & _SEGMENT_ENTRY_LARGE)) { - if (write && (*table & _SEGMENT_ENTRY_RO)) + if (write && (*table & _SEGMENT_ENTRY_PROTECT)) return -0x04UL; return (*table & _SEGMENT_ENTRY_ORIGIN_LARGE) + (address & ~_SEGMENT_ENTRY_ORIGIN_LARGE); @@ -117,7 +117,7 @@ static unsigned long follow_table(struct mm_struct *mm, table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } @@ -130,13 +130,13 @@ static unsigned long follow_table(struct mm_struct *mm, unsigned long *table = (unsigned long *)__pa(mm->pgd); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) return -0x10UL; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); table = table + ((address >> 12) & 0xff); if (unlikely(*table & _PAGE_INVALID)) return -0x11UL; - if (write && (*table & _PAGE_RO)) + if (write && (*table & _PAGE_PROTECT)) return -0x04UL; return (*table & PAGE_MASK) + (address & ~PAGE_MASK); } diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 3ad65b04ac15..46d517c3c763 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -53,7 +53,7 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level) seq_printf(m, "I\n"); return; } - seq_printf(m, "%s", pr & _PAGE_RO ? "RO " : "RW "); + seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW "); seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " "); seq_putc(m, '\n'); } @@ -105,12 +105,12 @@ static void note_page(struct seq_file *m, struct pg_state *st, } /* - * The actual page table walker functions. In order to keep the implementation - * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO - * flags to note_page() if a region, segment or page table entry is invalid or - * read-only. - * After all it's just a hint that the current level being walked contains an - * invalid or read-only entry. + * The actual page table walker functions. In order to keep the + * implementation of print_prot() short, we only check and pass + * _PAGE_INVALID and _PAGE_PROTECT flags to note_page() if a region, + * segment or page table entry is invalid or read-only. + * After all it's just a hint that the current level being walked + * contains an invalid or read-only entry. */ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t *pmd, unsigned long addr) @@ -122,14 +122,14 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { st->current_address = addr; pte = pte_offset_kernel(pmd, addr); - prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); + prot = pte_val(*pte) & (_PAGE_PROTECT | _PAGE_INVALID); note_page(m, st, prot, 4); addr += PAGE_SIZE; } } #ifdef CONFIG_64BIT -#define _PMD_PROT_MASK (_SEGMENT_ENTRY_RO | _SEGMENT_ENTRY_CO) +#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO) #else #define _PMD_PROT_MASK 0 #endif diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 1f5315d1215c..5d758db27bdc 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -24,7 +24,7 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, pte_t *ptep, pte; struct page *page; - mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL; + mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL; ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr); do { @@ -55,8 +55,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, struct page *head, *page, *tail; int refs; - result = write ? 0 : _SEGMENT_ENTRY_RO; - mask = result | _SEGMENT_ENTRY_INV; + result = write ? 0 : _SEGMENT_ENTRY_PROTECT; + mask = result | _SEGMENT_ENTRY_INVALID; if ((pmd_val(pmd) & mask) != result) return 0; VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT)); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 121089d57802..b0bd0ae17796 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -8,21 +8,107 @@ #include #include +static inline pmd_t __pte_to_pmd(pte_t pte) +{ + int none, prot; + pmd_t pmd; + + /* + * Convert encoding pte bits pmd bits + * .IR.....wdtp ..R...I..... + * empty .10.....0000 -> ..0...1..... + * prot-none, clean .11.....0001 -> ..1...1..... + * prot-none, dirty .10.....0101 -> ..1...1..... + * read-only, clean .01.....0001 -> ..1...0..... + * read-only, dirty .01.....0101 -> ..1...0..... + * read-write, clean .01.....1001 -> ..0...0..... + * read-write, dirty .00.....1101 -> ..0...0..... + * Huge ptes are dirty by definition, a clean pte is made dirty + * by the conversion. + */ + if (pte_present(pte)) { + pmd_val(pmd) = pte_val(pte) & PAGE_MASK; + if (pte_val(pte) & _PAGE_INVALID) + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + none = (pte_val(pte) & _PAGE_PRESENT) && + (pte_val(pte) & _PAGE_INVALID); + prot = (pte_val(pte) & _PAGE_PROTECT); + if (prot || none) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else + pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; + return pmd; +} + +static inline pte_t __pmd_to_pte(pmd_t pmd) +{ + pte_t pte; + + /* + * Convert encoding pmd bits pte bits + * ..R...I..... .IR.....wdtp + * empty ..0...1..... -> .10.....0000 + * prot-none, young ..1...1..... -> .10.....0101 + * read-only, young ..1...0..... -> .01.....0101 + * read-write, young ..0...0..... -> .00.....1101 + * Huge ptes are dirty by definition + */ + if (pmd_present(pmd)) { + pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY | + (pmd_val(pmd) & PAGE_MASK); + if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) + pte_val(pte) |= _PAGE_INVALID; + else { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_WRITE; + } + } else + pte_val(pte) = _PAGE_INVALID; + return pte; +} void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *pteptr, pte_t pteval) + pte_t *ptep, pte_t pte) { - pmd_t *pmdp = (pmd_t *) pteptr; - unsigned long mask; + pmd_t pmd; + pmd = __pte_to_pmd(pte); if (!MACHINE_HAS_HPAGE) { - pteptr = (pte_t *) pte_page(pteval)[1].index; - mask = pte_val(pteval) & - (_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO); - pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask; - } + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= pte_page(pte)[1].index; + } else + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO; + *(pmd_t *) ptep = pmd; +} - pmd_val(*pmdp) = pte_val(pteval); +pte_t huge_ptep_get(pte_t *ptep) +{ + unsigned long origin; + pmd_t pmd; + + pmd = *(pmd_t *) ptep; + if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { + origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; + pmd_val(pmd) |= *(unsigned long *) origin; + } + return __pmd_to_pte(pmd); +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pmd_t *pmdp = (pmd_t *) ptep; + pte_t pte = huge_ptep_get(ptep); + + if (MACHINE_HAS_IDTE) + __pmd_idte(addr, pmdp); + else + __pmd_csp(pmdp); + pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; + return pte; } int arch_prepare_hugepage(struct page *page) @@ -58,7 +144,7 @@ void arch_release_hugepage(struct page *page) ptep = (pte_t *) page[1].index; if (!ptep) return; - clear_table((unsigned long *) ptep, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) ptep, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); page_table_free(&init_mm, (unsigned long *) ptep); page[1].index = 0; diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 80adfbf75065..990397420e6b 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -118,7 +118,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) pte = pte_offset_kernel(pmd, address); if (!enable) { __ptep_ipte(address, pte); - pte_val(*pte) = _PAGE_TYPE_EMPTY; + pte_val(*pte) = _PAGE_INVALID; continue; } pte_val(*pte) = __pa(address); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index a8154a1a2c94..b9d35d63934e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -161,7 +161,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) struct gmap_rmap *rmap; struct page *page; - if (*table & _SEGMENT_ENTRY_INV) + if (*table & _SEGMENT_ENTRY_INVALID) return 0; page = pfn_to_page(*table >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; @@ -172,7 +172,7 @@ static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table) kfree(rmap); break; } - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT; return 1; } @@ -258,7 +258,7 @@ static int gmap_alloc_table(struct gmap *gmap, return -ENOMEM; new = (unsigned long *) page_to_phys(page); crst_table_init(new, init); - if (*table & _REGION_ENTRY_INV) { + if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); *table = (unsigned long) new | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); @@ -292,22 +292,22 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) for (off = 0; off < len; off += PMD_SIZE) { /* Walk the guest addr space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if (*table & _REGION_ENTRY_INV) + if (*table & _REGION_ENTRY_INVALID) goto out; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 20) & 0x7ff); /* Clear segment table entry in guest address space. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV; + *table = _SEGMENT_ENTRY_INVALID; } out: spin_unlock(&gmap->mm->page_table_lock); @@ -345,17 +345,17 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, for (off = 0; off < len; off += PMD_SIZE) { /* Walk the gmap address space page table */ table = gmap->table + (((to + off) >> 53) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 42) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + (((to + off) >> 31) & 0x7ff); - if ((*table & _REGION_ENTRY_INV) && + if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY)) goto out_unmap; table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN); @@ -363,7 +363,8 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, /* Store 'from' address in an invalid segment table entry. */ flush |= gmap_unlink_segment(gmap, table); - *table = _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | (from + off); + *table = (from + off) | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); } spin_unlock(&gmap->mm->page_table_lock); up_read(&gmap->mm->mmap_sem); @@ -384,15 +385,15 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) unsigned long *table; table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + if (unlikely(*table & _REGION_ENTRY_INVALID)) return ERR_PTR(-EFAULT); table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); @@ -422,11 +423,11 @@ unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) return PTR_ERR(segment_ptr); /* Convert the gmap address to an mm address. */ segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_RO) { + } else if (segment & _SEGMENT_ENTRY_PROTECT) { vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; return vmaddr | (address & ~PMD_MASK); } @@ -517,8 +518,8 @@ static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) page = pfn_to_page(__pa(table) >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID | + _SEGMENT_ENTRY_PROTECT); list_del(&rmap->list); kfree(rmap); flush = 1; @@ -545,13 +546,13 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Convert the gmap address to an mm address. */ while (1) { segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { + if (!(segment & _SEGMENT_ENTRY_INVALID)) { /* Page table is present */ page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } - if (!(segment & _SEGMENT_ENTRY_RO)) + if (!(segment & _SEGMENT_ENTRY_PROTECT)) /* Nothing mapped in the gmap address space. */ break; rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap); @@ -586,25 +587,25 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) while (address < to) { /* Walk the gmap address space page table */ table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) { + if (unlikely(*table & _REGION_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); table = table + ((address >> 20) & 0x7ff); - if (unlikely(*table & _SEGMENT_ENTRY_INV)) { + if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) { address = (address + PMD_SIZE) & PMD_MASK; continue; } @@ -687,7 +688,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len) continue; /* Set notification bit in the pgste of the pte */ entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_RO)) == 0) { + if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { pgste = pgste_get_lock(ptep); pgste_val(pgste) |= PGSTE_IN_BIT; pgste_set_unlock(ptep, pgste); @@ -752,7 +753,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, page->index = (unsigned long) mp; atomic_set(&page->_mapcount, 3); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); return table; } @@ -878,7 +879,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr) pgtable_page_ctor(page); atomic_set(&page->_mapcount, 1); table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } else { @@ -1198,9 +1199,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) list_del(lh); } ptep = (pte_t *) pgtable; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; ptep++; - pte_val(*ptep) = _PAGE_TYPE_EMPTY; + pte_val(*ptep) = _PAGE_INVALID; return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8b268fcc4612..e1299d40818d 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -69,7 +69,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address) pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + clear_table((unsigned long *) pte, _PAGE_INVALID, PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -101,7 +101,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) { pud_val(*pu_dir) = __pa(address) | _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE | - (ro ? _REGION_ENTRY_RO : 0); + (ro ? _REGION_ENTRY_PROTECT : 0); address += PUD_SIZE; continue; } @@ -118,7 +118,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | - (ro ? _SEGMENT_ENTRY_RO : 0); + (ro ? _SEGMENT_ENTRY_PROTECT : 0); address += PMD_SIZE; continue; } @@ -131,7 +131,8 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) } pt_dir = pte_offset_kernel(pm_dir, address); - pte_val(*pt_dir) = __pa(address) | (ro ? _PAGE_RO : 0); + pte_val(*pt_dir) = __pa(address) | + pgprot_val(ro ? PAGE_KERNEL_RO : PAGE_KERNEL); address += PAGE_SIZE; } ret = 0; @@ -154,7 +155,7 @@ static void vmem_remove_range(unsigned long start, unsigned long size) pte_t *pt_dir; pte_t pte; - pte_val(pte) = _PAGE_TYPE_EMPTY; + pte_val(pte) = _PAGE_INVALID; while (address < end) { pg_dir = pgd_offset_k(address); if (pgd_none(*pg_dir)) { @@ -255,7 +256,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) new_page =__pa(vmem_alloc_pages(0)); if (!new_page) goto out; - pte_val(*pt_dir) = __pa(new_page); + pte_val(*pt_dir) = + __pa(new_page) | pgprot_val(PAGE_KERNEL); } address += PAGE_SIZE; } From a055f66a3a099e2c886a53421405f98683e94b2b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Jul 2013 10:31:55 +0200 Subject: [PATCH 10/32] s390/pgtable: skip pgste updates on full flush On process exit there is no more need for the pgste information. Skip the expensive storage key operations which should speed up termination of KVM processes. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b09c00b5cfa2..b59b44badae9 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1144,10 +1144,9 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pgste_t pgste; pte_t pte; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); - if (!full) - pgste = pgste_ipte_notify(mm, address, ptep, pgste); + pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; @@ -1155,7 +1154,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, __ptep_ipte(address, ptep); pte_val(*ptep) = _PAGE_INVALID; - if (mm_has_pgste(mm)) { + if (!full && mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); pgste_set_unlock(ptep, pgste); } From d56c893d36678f7ec8552f22ec12793a82ff8ee6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 19 Jul 2013 11:15:54 +0200 Subject: [PATCH 11/32] s390/pgtable: add pgste_get helper ptep_modify_prot_start uses the pgste_set helper to store the pgste, while ptep_modify_prot_commit uses its own pointer magic to retrieve the value again. Add the pgste_get help function to keep things symmetrical and improve readability. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b59b44badae9..26ee897aa1d6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -619,6 +619,15 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) #endif } +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + static inline void pgste_set(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE @@ -1098,7 +1107,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, pgste_t pgste; if (mm_has_pgste(mm)) { - pgste = *(pgste_t *)(ptep + PTRS_PER_PTE); + pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte); pgste_set_pte(ptep, pte); pgste_set_unlock(ptep, pgste); From 363fd4c1f17b94b2076b57286e9507785a05bbce Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 26 Jul 2013 11:03:16 +0200 Subject: [PATCH 12/32] s390/sclp: reword cpu capability change message Change wording for the cpu capabiity changed message: If such an event occurs it only means that a cpu capability *may* have changed. A cpu capability change event may also occur for other reasons. Also change the severity of the message from warning to informational. If such an event happens user space should into /proc/sysinfo and verify if some capability values changed, if that is of interest. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index 444d36183a25..944156207477 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -32,7 +32,7 @@ static void sclp_cpu_capability_notify(struct work_struct *work) struct device *dev; s390_adjust_jiffies(); - pr_warning("cpu capability changed.\n"); + pr_info("CPU capability may have changed\n"); get_online_cpus(); for_each_online_cpu(cpu) { dev = get_cpu_device(cpu); From b6bed093f489ef0a858e63eebcf5f2fb71ed3222 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 8 Aug 2013 12:37:00 +0200 Subject: [PATCH 13/32] s390/time: clock comparator revalidation Always use the S390_lowcore.clock_comparator field to revalidate the clock comparator CPU register after a machine check. This avoids an unnecssary external interrupt after a machine check if no timer is pending. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/nmi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 504175ebf8b0..c4c033819879 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -214,10 +214,7 @@ static int notrace s390_revalidate_registers(struct mci *mci) : "0", "cc"); #endif /* Revalidate clock comparator register */ - if (S390_lowcore.clock_comparator == -1) - set_clock_comparator(S390_lowcore.mcck_clock); - else - set_clock_comparator(S390_lowcore.clock_comparator); + set_clock_comparator(S390_lowcore.clock_comparator); /* Check if old PSW is valid */ if (!mci->wp) /* From 5c474a1e2265c5156e6c63f87a7e99053039b8b9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 16 Aug 2013 13:31:40 +0200 Subject: [PATCH 14/32] s390/mm: introduce ptep_flush_lazy helper Isolate the logic of IDTE vs. IPTE flushing of ptes in two functions, ptep_flush_lazy and __tlb_flush_mm_lazy. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/mmu_context.h | 3 +-- arch/s390/include/asm/pgtable.h | 31 ++++++++++++++--------------- arch/s390/include/asm/tlb.h | 3 ++- arch/s390/include/asm/tlbflush.h | 6 +++--- arch/s390/mm/pgtable.c | 6 +++--- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 084e7755ed9b..7b7fce4e8469 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -77,8 +77,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, WARN_ON(atomic_read(&prev->context.attach_count) < 0); atomic_inc(&next->context.attach_count); /* Check for TLBs not flushed yet */ - if (next->context.flush_mm) - __tlb_flush_mm(next); + __tlb_flush_mm_lazy(next); } #define enter_lazy_tlb(mm,tsk) do { } while (0) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 26ee897aa1d6..125e37909998 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -414,12 +414,6 @@ extern unsigned long MODULES_END; #define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT) #define SEGMENT_WRITE __pgprot(0) -static inline int mm_exclusive(struct mm_struct *mm) -{ - return likely(mm == current->active_mm && - atomic_read(&mm->context.attach_count) <= 1); -} - static inline int mm_has_pgste(struct mm_struct *mm) { #ifdef CONFIG_PGSTE @@ -1037,6 +1031,17 @@ static inline void __ptep_ipte(unsigned long address, pte_t *ptep) } } +static inline void ptep_flush_lazy(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + int active = (mm == current->active_mm) ? 1 : 0; + + if (atomic_read(&mm->context.attach_count) > active) + __ptep_ipte(address, ptep); + else + mm->context.flush_mm = 1; +} + /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -1057,15 +1062,13 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { @@ -1083,15 +1086,13 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pgste_t pgste; pte_t pte; - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste_ipte_notify(mm, address, ptep, pgste); } pte = *ptep; - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); @@ -1160,7 +1161,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pte = *ptep; if (!full) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); pte_val(*ptep) = _PAGE_INVALID; if (!full && mm_has_pgste(mm)) { @@ -1178,14 +1179,12 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, pte_t pte = *ptep; if (pte_write(pte)) { - mm->context.flush_mm = 1; if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_ipte_notify(mm, address, ptep, pgste); } - if (!mm_exclusive(mm)) - __ptep_ipte(address, ptep); + ptep_flush_lazy(mm, address, ptep); pte = pte_wrprotect(pte); if (mm_has_pgste(mm)) { diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 6d6d92b4ea11..2cb846c4b37f 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -63,13 +63,14 @@ static inline void tlb_gather_mmu(struct mmu_gather *tlb, static inline void tlb_flush_mmu(struct mmu_gather *tlb) { + __tlb_flush_mm_lazy(tlb->mm); tlb_table_flush(tlb); } static inline void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } /* diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h index 6b32af30878c..f9fef0425fee 100644 --- a/arch/s390/include/asm/tlbflush.h +++ b/arch/s390/include/asm/tlbflush.h @@ -86,7 +86,7 @@ static inline void __tlb_flush_mm(struct mm_struct * mm) __tlb_flush_full(mm); } -static inline void __tlb_flush_mm_cond(struct mm_struct * mm) +static inline void __tlb_flush_mm_lazy(struct mm_struct * mm) { if (mm->context.flush_mm) { __tlb_flush_mm(mm); @@ -118,13 +118,13 @@ static inline void __tlb_flush_mm_cond(struct mm_struct * mm) static inline void flush_tlb_mm(struct mm_struct *mm) { - __tlb_flush_mm_cond(mm); + __tlb_flush_mm_lazy(mm); } static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - __tlb_flush_mm_cond(vma->vm_mm); + __tlb_flush_mm_lazy(vma->vm_mm); } static inline void flush_tlb_kernel_range(unsigned long start, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b9d35d63934e..befaea7003f7 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1008,7 +1008,6 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { - __tlb_flush_mm(tlb->mm); call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } @@ -1018,11 +1017,12 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct mmu_table_batch **batch = &tlb->batch; + tlb->mm->context.flush_mm = 1; if (*batch == NULL) { *batch = (struct mmu_table_batch *) __get_free_page(GFP_NOWAIT | __GFP_NOWARN); if (*batch == NULL) { - __tlb_flush_mm(tlb->mm); + __tlb_flush_mm_lazy(tlb->mm); tlb_remove_table_one(table); return; } @@ -1030,7 +1030,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table) } (*batch)->tables[(*batch)->nr++] = table; if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); + tlb_flush_mmu(tlb); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE From 558b9ef00e128b2c97df6e4243152af3d0da3653 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Fri, 16 Aug 2013 15:57:32 +0200 Subject: [PATCH 15/32] s390/dasd: enable raw_track_access reads without direct I/O The ECKD protocol supports reading of tracks with arbitrary format as raw track images. The DASD device driver supports this in its raw_track_access mode. In this mode it maps each track to sixteen 4096 byte sectors and rejects all requests that are not properly aligned to this mapping. An application that wants to use a DASD in raw_track_access mode will usually use direct I/O to make sure that properly aligned requests are directly submitted to the driver. However, applications that are not aware of this mode, e.g. udev, will encounter I/O errors. To make the use without direct I/O possible and avoid this kind of alignment errors, we now pad unaligned read requests with a dummy page, so that we can always read full tracks. Please note that writing is still only possible for full track images that are properly aligned. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 54 +++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e61a6deea3c0..5adb2042e824 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -85,6 +85,8 @@ MODULE_DEVICE_TABLE(ccw, dasd_eckd_ids); static struct ccw_driver dasd_eckd_driver; /* see below */ +static void *rawpadpage; + #define INIT_CQR_OK 0 #define INIT_CQR_UNFORMATTED 1 #define INIT_CQR_ERROR 2 @@ -3237,18 +3239,26 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, unsigned int seg_len, len_to_track_end; unsigned int first_offs; unsigned int cidaw, cplength, datasize; - sector_t first_trk, last_trk; + sector_t first_trk, last_trk, sectors; + sector_t start_padding_sectors, end_sector_offset, end_padding_sectors; unsigned int pfx_datasize; /* * raw track access needs to be mutiple of 64k and on 64k boundary + * For read requests we can fix an incorrect alignment by padding + * the request with dummy pages. */ - if ((blk_rq_pos(req) % DASD_RAW_SECTORS_PER_TRACK) != 0) { - cqr = ERR_PTR(-EINVAL); - goto out; - } - if (((blk_rq_pos(req) + blk_rq_sectors(req)) % - DASD_RAW_SECTORS_PER_TRACK) != 0) { + start_padding_sectors = blk_rq_pos(req) % DASD_RAW_SECTORS_PER_TRACK; + end_sector_offset = (blk_rq_pos(req) + blk_rq_sectors(req)) % + DASD_RAW_SECTORS_PER_TRACK; + end_padding_sectors = (DASD_RAW_SECTORS_PER_TRACK - end_sector_offset) % + DASD_RAW_SECTORS_PER_TRACK; + basedev = block->base; + if ((start_padding_sectors || end_padding_sectors) && + (rq_data_dir(req) == WRITE)) { + DBF_DEV_EVENT(DBF_ERR, basedev, + "raw write not track aligned (%lu,%lu) req %p", + start_padding_sectors, end_padding_sectors, req); cqr = ERR_PTR(-EINVAL); goto out; } @@ -3258,7 +3268,6 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, DASD_RAW_SECTORS_PER_TRACK; trkcount = last_trk - first_trk + 1; first_offs = 0; - basedev = block->base; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_TRACK; @@ -3307,12 +3316,26 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, } idaws = (unsigned long *)(cqr->data + pfx_datasize); - len_to_track_end = 0; - + if (start_padding_sectors) { + ccw[-1].flags |= CCW_FLAG_CC; + ccw->cmd_code = cmd; + /* maximum 3390 track size */ + ccw->count = 57326; + /* 64k map to one track */ + len_to_track_end = 65536 - start_padding_sectors * 512; + ccw->cda = (__u32)(addr_t)idaws; + ccw->flags |= CCW_FLAG_IDA; + ccw->flags |= CCW_FLAG_SLI; + ccw++; + for (sectors = 0; sectors < start_padding_sectors; sectors += 8) + idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE); + } rq_for_each_segment(bv, req, iter) { dst = page_address(bv->bv_page) + bv->bv_offset; seg_len = bv->bv_len; + if (cmd == DASD_ECKD_CCW_READ_TRACK) + memset(dst, 0, seg_len); if (!len_to_track_end) { ccw[-1].flags |= CCW_FLAG_CC; ccw->cmd_code = cmd; @@ -3328,7 +3351,8 @@ static struct dasd_ccw_req *dasd_raw_build_cp(struct dasd_device *startdev, len_to_track_end -= seg_len; idaws = idal_create_words(idaws, dst, seg_len); } - + for (sectors = 0; sectors < end_padding_sectors; sectors += 8) + idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE); if (blk_noretry_request(req) || block->base->features & DASD_FEATURE_FAILFAST) set_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags); @@ -4479,12 +4503,19 @@ dasd_eckd_init(void) kfree(dasd_reserve_req); return -ENOMEM; } + rawpadpage = (void *)__get_free_page(GFP_KERNEL); + if (!rawpadpage) { + kfree(path_verification_worker); + kfree(dasd_reserve_req); + return -ENOMEM; + } ret = ccw_driver_register(&dasd_eckd_driver); if (!ret) wait_for_device_probe(); else { kfree(path_verification_worker); kfree(dasd_reserve_req); + free_page((unsigned long)rawpadpage); } return ret; } @@ -4495,6 +4526,7 @@ dasd_eckd_cleanup(void) ccw_driver_unregister(&dasd_eckd_driver); kfree(path_verification_worker); kfree(dasd_reserve_req); + free_page((unsigned long)rawpadpage); } module_init(dasd_eckd_init); From 02aff3aa1772c63d82e1f160d7b60db1f9649f43 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Aug 2013 08:24:29 +0200 Subject: [PATCH 16/32] s390/bitops: fix inline assembly constraints Fix inline assembly contraints for non atomic bitops functions. This is broken since 2.6.34 987bcdac "[S390] use inline assembly contraints available with gcc 3.3.3". Reported-by: Andreas Krebbel Reported-by: Peter Oberparleiter Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/bitops.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 7d4676758733..10135a38673c 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -216,7 +216,7 @@ static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -244,7 +244,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc"); } static inline void @@ -271,7 +271,7 @@ static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr) addr = (unsigned long) ptr + ((nr ^ (BITS_PER_LONG - 8)) >> 3); asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc" ); + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc"); } static inline void @@ -301,7 +301,7 @@ test_and_set_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " oc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -320,7 +320,7 @@ test_and_clear_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " nc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_ni_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } @@ -339,7 +339,7 @@ test_and_change_bit_simple(unsigned long nr, volatile unsigned long *ptr) ch = *(unsigned char *) addr; asm volatile( " xc %O0(1,%R0),%1" - : "=Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) + : "+Q" (*(char *) addr) : "Q" (_oi_bitmap[nr & 7]) : "cc", "memory"); return (ch >> (nr & 7)) & 1; } From bee5c2863ef807fa50bbdf8b2bae887c54a97083 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 20 Aug 2013 10:24:12 +0200 Subject: [PATCH 17/32] s390/switch_to: fix save_access_regs() / restore_access_regs() Fix broken contraints for both save_access_regs() and restore_access_regs(). The constraints are incorrect since they tell the compiler that the inline assemblies only access the first element of an array of 16 elements. Therefore the compiler could generate incorrect code. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/switch_to.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h index 80b6f11263c4..6dbd559763c9 100644 --- a/arch/s390/include/asm/switch_to.h +++ b/arch/s390/include/asm/switch_to.h @@ -8,6 +8,7 @@ #define __ASM_SWITCH_TO_H #include +#include extern struct task_struct *__switch_to(void *, void *); extern void update_cr_regs(struct task_struct *task); @@ -68,12 +69,16 @@ static inline void restore_fp_regs(s390_fp_regs *fpregs) static inline void save_access_regs(unsigned int *acrs) { - asm volatile("stam 0,15,%0" : "=Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("stam 0,15,%0" : "=Q" (*(acrstype *)acrs)); } static inline void restore_access_regs(unsigned int *acrs) { - asm volatile("lam 0,15,%0" : : "Q" (*acrs)); + typedef struct { int _[NUM_ACRS]; } acrstype; + + asm volatile("lam 0,15,%0" : : "Q" (*(acrstype *)acrs)); } #define switch_to(prev,next,last) do { \ From 6b169ac9b4342ff3a1499bdeb7596aa4f1bc401b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 1 Aug 2013 10:16:27 +0200 Subject: [PATCH 18/32] s390/kprobes: add support for compare and branch instructions The compare and branch instructions (not relative) all need special handling when kprobed: - if a branch was taken, the instruction pointer should be left alone - if a branch was not taken, the instruction pointer must be adjusted The compare and branch instructions family was introduced with the general instruction extension facility (z10). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/kprobes.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 3388b2b2a07d..adbbe7f1cb0d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -105,14 +105,31 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn) fixup |= FIXUP_RETURN_REGISTER; break; case 0xeb: - if ((insn[2] & 0xff) == 0x44 || /* bxhg */ - (insn[2] & 0xff) == 0x45) /* bxleg */ + switch (insn[2] & 0xff) { + case 0x44: /* bxhg */ + case 0x45: /* bxleg */ fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } break; case 0xe3: /* bctg */ if ((insn[2] & 0xff) == 0x46) fixup = FIXUP_BRANCH_NOT_TAKEN; break; + case 0xec: + switch (insn[2] & 0xff) { + case 0xe5: /* clgrb */ + case 0xe6: /* cgrb */ + case 0xf6: /* crb */ + case 0xf7: /* clrb */ + case 0xfc: /* cgib */ + case 0xfd: /* cglib */ + case 0xfe: /* cib */ + case 0xff: /* clib */ + fixup = FIXUP_BRANCH_NOT_TAKEN; + break; + } + break; } return fixup; } From 0587d409ec53312f735d2004d5f47f8effee1ea9 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 23 Aug 2013 14:45:58 +0200 Subject: [PATCH 19/32] s390/time: return with irqs disabled from psw_idle Modify the psw_idle waiting logic in entry[64].S to return with interrupts disabled. This avoids potential issues with udelay and interrupt loops as interrupts are not reenabled after clock comparator interrupts. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 4 ++-- arch/s390/kernel/entry64.S | 2 +- arch/s390/kernel/process.c | 1 + arch/s390/kernel/time.c | 1 - arch/s390/lib/delay.c | 2 -- 5 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5ca70b4b72cb..cc30d1fb000c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -886,13 +886,13 @@ cleanup_idle: stm %r9,%r10,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - n %r8,BASED(cleanup_idle_wait) # clear wait state bit + n %r8,BASED(cleanup_idle_wait) # clear irq & wait state bits l %r9,24(%r11) # return from psw_idle br %r14 cleanup_idle_insn: .long psw_idle_lpsw + 0x80000000 cleanup_idle_wait: - .long 0xfffdffff + .long 0xfcfdffff /* * Integer constants diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 980c7aa1cc5c..2b2188b97c6a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -929,7 +929,7 @@ cleanup_idle: stg %r9,__LC_SYSTEM_TIMER mvc __LC_LAST_UPDATE_TIMER(8),__TIMER_IDLE_EXIT(%r2) # prepare return psw - nihh %r8,0xfffd # clear wait state bit + nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle br %r14 cleanup_idle_insn: diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2bc3eddae34a..c5dbb335716d 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -71,6 +71,7 @@ void arch_cpu_idle(void) } /* Halt the cpu and keep track of cpu time accounting. */ vtime_stop_cpu(); + local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 876546b9cfa1..064c3082ab33 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -92,7 +92,6 @@ void clock_comparator_work(void) struct clock_event_device *cd; S390_lowcore.clock_comparator = -1ULL; - set_clock_comparator(S390_lowcore.clock_comparator); cd = &__get_cpu_var(comparators); cd->event_handler(cd); } diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index c61b9fad43cc..57c87d7d7ede 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -44,7 +44,6 @@ static void __udelay_disabled(unsigned long long usecs) do { set_clock_comparator(end); vtime_stop_cpu(); - local_irq_disable(); } while (get_tod_clock() < end); lockdep_on(); __ctl_load(cr0, 0, 0); @@ -64,7 +63,6 @@ static void __udelay_enabled(unsigned long long usecs) set_clock_comparator(end); } vtime_stop_cpu(); - local_irq_disable(); if (clock_saved) local_tick_enable(clock_saved); } while (get_tod_clock() < end); From 0196c642f76c469c17ca09d7f904bccc123bac09 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Aug 2013 08:19:27 +0200 Subject: [PATCH 20/32] s390/pgtable: fix mprotect for single-threaded KVM guests For a single-threaded KVM guest ptep_modify_prot_start will not use IPTE, the invalid bit will therefore not be set. If DEBUG_VM is set pgste_set_key called by ptep_modify_prot_commit will complain about the missing invalid bit. ptep_modify_prot_start should set the invalid bit in all cases. Cc: stable@vger.kernel.org # 3.10+ Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 125e37909998..1d144b6f3ba2 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1093,6 +1093,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, pte = *ptep; ptep_flush_lazy(mm, address, ptep); + pte_val(*ptep) |= _PAGE_INVALID; if (mm_has_pgste(mm)) { pgste = pgste_update_all(&pte, pgste); From 0628a5fbe1b5af35ef5346443ff1056e257a7350 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Aug 2013 09:56:55 +0200 Subject: [PATCH 21/32] s390/tx: allow program interruption filtering in user space A user space program using the transactional execution facility should be allowed to do program interrupt filtering. Do not set the transactional-execution program-interruption-filtering override (PIFO) bit in CR0. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ptrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 2bc08039140f..9556905bd3ce 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -60,11 +60,11 @@ void update_cr_regs(struct task_struct *task) __ctl_store(cr, 0, 2); cr_new[1] = cr[1]; - /* Set or clear transaction execution TXC/PIFO bits 8 and 9. */ + /* Set or clear transaction execution TXC bit 8. */ if (task->thread.per_flags & PER_FLAG_NO_TE) - cr_new[0] = cr[0] & ~(3UL << 54); + cr_new[0] = cr[0] & ~(1UL << 55); else - cr_new[0] = cr[0] | (3UL << 54); + cr_new[0] = cr[0] | (1UL << 55); /* Set or clear transaction execution TDC bits 62 and 63. */ cr_new[2] = cr[2] & ~3UL; if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) { From fbd70035fb2b03deb346052084794bc1d0e25aa2 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Tue, 27 Aug 2013 13:52:17 +0200 Subject: [PATCH 22/32] s390/dasd: fix statistics for recovered requests When a recovery cqr is cleaned up, copy the start time, stop time, and start device to the original cqr. These times are needed later when the original request is finalized and counted in the DASD statistics. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_erp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c index ba99b64e2b3f..e1e88486b2b4 100644 --- a/drivers/s390/block/dasd_erp.c +++ b/drivers/s390/block/dasd_erp.c @@ -124,10 +124,15 @@ dasd_default_erp_action(struct dasd_ccw_req *cqr) struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr) { int success; + unsigned long long startclk, stopclk; + struct dasd_device *startdev; BUG_ON(cqr->refers == NULL || cqr->function == NULL); success = cqr->status == DASD_CQR_DONE; + startclk = cqr->startclk; + stopclk = cqr->stopclk; + startdev = cqr->startdev; /* free all ERPs - but NOT the original cqr */ while (cqr->refers != NULL) { @@ -142,6 +147,9 @@ struct dasd_ccw_req *dasd_default_erp_postaction(struct dasd_ccw_req *cqr) } /* set corresponding status to original cqr */ + cqr->startclk = startclk; + cqr->stopclk = stopclk; + cqr->startdev = startdev; if (success) cqr->status = DASD_CQR_DONE; else { From 0944fe3f4a323f436180d39402cae7f9c46ead17 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 23 Jul 2013 22:11:42 +0200 Subject: [PATCH 23/32] s390/mm: implement software referenced bits The last remaining use for the storage key of the s390 architecture is reference counting. The alternative is to make page table entries invalid while they are old. On access the fault handler marks the pte/pmd as young which makes the pte/pmd valid if the access rights allow read access. The pte/pmd invalidations required for software managed reference bits cost a bit of performance, on the other hand the RRBE/RRBM instructions to read and reset the referenced bits are quite expensive as well. Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 9 - arch/s390/include/asm/pgtable.h | 343 ++++++++++++++++++-------------- arch/s390/mm/hugetlbpage.c | 58 ++++-- arch/s390/mm/pgtable.c | 18 +- arch/s390/mm/vmem.c | 1 + include/asm-generic/pgtable.h | 4 - mm/rmap.c | 3 - 7 files changed, 243 insertions(+), 193 deletions(-) diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 27f04801ec8b..1e51f2915b2e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -140,15 +140,6 @@ static inline int page_reset_referenced(unsigned long addr) #define _PAGE_FP_BIT 0x08 /* HW fetch protection bit */ #define _PAGE_ACC_BITS 0xf0 /* HW access control bits */ -/* - * Test and clear referenced bit in storage key. - */ -#define __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -static inline int page_test_and_clear_young(unsigned long pfn) -{ - return page_reset_referenced(pfn << PAGE_SHIFT); -} - struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 1d144b6f3ba2..9f215b40109e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -226,8 +226,9 @@ extern unsigned long MODULES_END; #define _PAGE_TYPE 0x002 /* SW pte type bit */ #define _PAGE_YOUNG 0x004 /* SW pte young bit */ #define _PAGE_DIRTY 0x008 /* SW pte dirty bit */ -#define _PAGE_WRITE 0x010 /* SW pte write bit */ -#define _PAGE_SPECIAL 0x020 /* SW associated with special page */ +#define _PAGE_READ 0x010 /* SW pte read bit */ +#define _PAGE_WRITE 0x020 /* SW pte write bit */ +#define _PAGE_SPECIAL 0x040 /* SW associated with special page */ #define __HAVE_ARCH_PTE_SPECIAL /* Set of bits not changed in pte_modify */ @@ -243,19 +244,25 @@ extern unsigned long MODULES_END; * The following table gives the different possible bit combinations for * the pte hardware and software bits in the last 12 bits of a pte: * - * 842100000000 - * 000084210000 - * 000000008421 - * .IR....wdytp - * empty .10....00000 - * swap .10....xxx10 - * file .11....xxxx0 - * prot-none, clean .11....00x01 - * prot-none, dirty .10....01x01 - * read-only, clean .01....00x01 - * read-only, dirty .01....01x01 - * read-write, clean .01....10x01 - * read-write, dirty .00....11x01 + * 842100000000 + * 000084210000 + * 000000008421 + * .IR...wrdytp + * empty .10...000000 + * swap .10...xxxx10 + * file .11...xxxxx0 + * prot-none, clean, old .11...000001 + * prot-none, clean, young .11...000101 + * prot-none, dirty, old .10...001001 + * prot-none, dirty, young .10...001101 + * read-only, clean, old .11...010001 + * read-only, clean, young .01...010101 + * read-only, dirty, old .11...011001 + * read-only, dirty, young .01...011101 + * read-write, clean, old .11...110001 + * read-write, clean, young .01...110101 + * read-write, dirty, old .10...111001 + * read-write, dirty, young .00...111101 * * pte_present is true for the bit pattern .xx...xxxxx1, (pte & 0x001) == 0x001 * pte_none is true for the bit pattern .10...xxxx00, (pte & 0x603) == 0x400 @@ -273,15 +280,26 @@ extern unsigned long MODULES_END; #define _ASCE_TABLE_LENGTH 0x7f /* 128 x 64 entries = 8k */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0x7fffffffUL /* Valid segment table bits */ #define _SEGMENT_ENTRY_ORIGIN 0x7fffffc0UL /* page table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ #define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */ #define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */ #define _SEGMENT_ENTRY_PTL 0x0f /* page table length */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT #define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL) #define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID) +/* + * Segment table entry encoding (I = invalid, R = read-only bit): + * ..R...I..... + * prot-none ..1...1..... + * read-only ..1...0..... + * read-write ..0...0..... + * empty ..0...1..... + */ + /* Page status table bits for virtualization */ #define PGSTE_ACC_BITS 0xf0000000UL #define PGSTE_FP_BIT 0x08000000UL @@ -290,9 +308,7 @@ extern unsigned long MODULES_END; #define PGSTE_HC_BIT 0x00200000UL #define PGSTE_GR_BIT 0x00040000UL #define PGSTE_GC_BIT 0x00020000UL -#define PGSTE_UR_BIT 0x00008000UL -#define PGSTE_UC_BIT 0x00004000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x00002000UL /* IPTE notify bit */ +#define PGSTE_IN_BIT 0x00008000UL /* IPTE notify bit */ #else /* CONFIG_64BIT */ @@ -331,6 +347,8 @@ extern unsigned long MODULES_END; #define _REGION3_ENTRY_CO 0x100 /* change-recording override */ /* Bits in the segment table entry */ +#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL +#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL #define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */ #define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */ #define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */ @@ -342,6 +360,21 @@ extern unsigned long MODULES_END; #define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */ #define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */ #define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */ +#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */ +#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG + +/* + * Segment table entry encoding (R = read-only, I = invalid, y = young bit): + * ..R...I...y. + * prot-none, old ..0...1...1. + * prot-none, young ..1...1...1. + * read-only, old ..1...1...0. + * read-only, young ..1...0...1. + * read-write, old ..0...1...0. + * read-write, young ..0...0...1. + * The segment table origin is used to distinguish empty (origin==0) from + * read-write, old segment table entries (origin!=0) + */ #define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */ @@ -357,9 +390,7 @@ extern unsigned long MODULES_END; #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UR_BIT 0x0000800000000000UL -#define PGSTE_UC_BIT 0x0000400000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000200000000000UL /* IPTE notify bit */ +#define PGSTE_IN_BIT 0x0000800000000000UL /* IPTE notify bit */ #endif /* CONFIG_64BIT */ @@ -375,12 +406,17 @@ extern unsigned long MODULES_END; * Page protection definitions. */ #define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID) -#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) -#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_PROTECT) +#define PAGE_READ __pgprot(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_INVALID | _PAGE_PROTECT) +#define PAGE_WRITE __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) -#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_PROTECT) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ + _PAGE_YOUNG | _PAGE_DIRTY) +#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ + _PAGE_PROTECT) /* * On s390 the page table entry has an invalid bit and a read-only bit. @@ -410,9 +446,10 @@ extern unsigned long MODULES_END; * Segment entry (large page) protection definitions. */ #define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \ + _SEGMENT_ENTRY_NONE) +#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT) -#define SEGMENT_WRITE __pgprot(0) +#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID) static inline int mm_has_pgste(struct mm_struct *mm) { @@ -520,10 +557,19 @@ static inline int pmd_large(pmd_t pmd) #endif } +static inline int pmd_prot_none(pmd_t pmd) +{ + return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) && + (pmd_val(pmd) & _SEGMENT_ENTRY_NONE); +} + static inline int pmd_bad(pmd_t pmd) { - unsigned long mask = ~_SEGMENT_ENTRY_ORIGIN & ~_SEGMENT_ENTRY_INVALID; - return (pmd_val(pmd) & mask) != _SEGMENT_ENTRY; +#ifdef CONFIG_64BIT + if (pmd_large(pmd)) + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0; +#endif + return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH @@ -542,12 +588,21 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { + if (pmd_prot_none(pmd)) + return 0; return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0; } static inline int pmd_young(pmd_t pmd) { - return 0; + int young = 0; +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) + young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0; + else + young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; +#endif + return young; } static inline int pte_present(pte_t pte) @@ -632,33 +687,28 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long address, bits; - unsigned char skey; + unsigned long address, bits, skey; if (pte_val(*ptep) & _PAGE_INVALID) return pgste; address = pte_val(*ptep) & PAGE_MASK; - skey = page_get_storage_key(address); + skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Clear page changed & referenced bit in the storage key */ - if (bits & _PAGE_CHANGED) + if (!(pgste_val(pgste) & PGSTE_HC_BIT) && (bits & _PAGE_CHANGED)) { + /* Transfer dirty + referenced bit to host bits in pgste */ + pgste_val(pgste) |= bits << 52; page_set_storage_key(address, skey ^ bits, 0); - else if (bits) + } else if (!(pgste_val(pgste) & PGSTE_HR_BIT) && + (bits & _PAGE_REFERENCED)) { + /* Transfer referenced bit to host bit in pgste */ + pgste_val(pgste) |= PGSTE_HR_BIT; page_reset_referenced(address); + } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Get host changed & referenced bits from pgste */ - bits |= (pgste_val(pgste) & (PGSTE_HR_BIT | PGSTE_HC_BIT)) >> 52; - /* Transfer page changed & referenced bit to kvm user bits */ - pgste_val(pgste) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ - /* Clear relevant host bits in pgste. */ - pgste_val(pgste) &= ~(PGSTE_HR_BIT | PGSTE_HC_BIT); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) |= - (unsigned long) (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; - /* Transfer referenced bit to pte */ - pte_val(*ptep) |= (bits & _PAGE_REFERENCED) << 1; + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; #endif return pgste; @@ -667,24 +717,11 @@ static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - int young; - if (pte_val(*ptep) & _PAGE_INVALID) return pgste; /* Get referenced bit from storage key */ - young = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); - if (young) - pgste_val(pgste) |= PGSTE_GR_BIT; - /* Get host referenced bit from pgste */ - if (pgste_val(pgste) & PGSTE_HR_BIT) { - pgste_val(pgste) &= ~PGSTE_HR_BIT; - young = 1; - } - /* Transfer referenced bit to kvm user bits and pte */ - if (young) { - pgste_val(pgste) |= PGSTE_UR_BIT; - pte_val(*ptep) |= _PAGE_YOUNG; - } + if (page_reset_referenced(pte_val(*ptep) & PAGE_MASK)) + pgste_val(pgste) |= PGSTE_HR_BIT | PGSTE_GR_BIT; #endif return pgste; } @@ -839,11 +876,7 @@ static inline int pte_dirty(pte_t pte) static inline int pte_young(pte_t pte) { -#ifdef CONFIG_PGSTE - if (pte_val(pte) & _PAGE_YOUNG) - return 1; -#endif - return 0; + return (pte_val(pte) & _PAGE_YOUNG) != 0; } /* @@ -884,6 +917,16 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) &= _PAGE_CHG_MASK; pte_val(pte) |= pgprot_val(newprot); + /* + * newprot for PAGE_NONE, PAGE_READ and PAGE_WRITE has the + * invalid bit set, clear it again for readable, young pages + */ + if ((pte_val(pte) & _PAGE_YOUNG) && (pte_val(pte) & _PAGE_READ)) + pte_val(pte) &= ~_PAGE_INVALID; + /* + * newprot for PAGE_READ and PAGE_WRITE has the page protection + * bit set, clear it again for writable, dirty pages + */ if ((pte_val(pte) & _PAGE_DIRTY) && (pte_val(pte) & _PAGE_WRITE)) pte_val(pte) &= ~_PAGE_PROTECT; return pte; @@ -921,14 +964,16 @@ static inline pte_t pte_mkdirty(pte_t pte) static inline pte_t pte_mkold(pte_t pte) { -#ifdef CONFIG_PGSTE pte_val(pte) &= ~_PAGE_YOUNG; -#endif + pte_val(pte) |= _PAGE_INVALID; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { + pte_val(pte) |= _PAGE_YOUNG; + if (pte_val(pte) & _PAGE_READ) + pte_val(pte) &= ~_PAGE_INVALID; return pte; } @@ -958,8 +1003,8 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_all(ptep, pgste); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; + dirty = !!(pgste_val(pgste) & PGSTE_HC_BIT); + pgste_val(pgste) &= ~PGSTE_HC_BIT; pgste_set_unlock(ptep, pgste); return dirty; } @@ -978,42 +1023,13 @@ static inline int ptep_test_and_clear_user_young(struct mm_struct *mm, if (mm_has_pgste(mm)) { pgste = pgste_get_lock(ptep); pgste = pgste_update_young(ptep, pgste); - young = !!(pgste_val(pgste) & PGSTE_UR_BIT); - pgste_val(pgste) &= ~PGSTE_UR_BIT; + young = !!(pgste_val(pgste) & PGSTE_HR_BIT); + pgste_val(pgste) &= ~PGSTE_HR_BIT; pgste_set_unlock(ptep, pgste); } return young; } -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_update_young(ptep, pgste); - pte = *ptep; - *ptep = pte_mkold(pte); - pgste_set_unlock(ptep, pgste); - return pte_young(pte); - } - return 0; -} - -#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH -static inline int ptep_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) -{ - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB - * With virtualization we handle the reference bit, without we - * we can simply return */ - return ptep_test_and_clear_young(vma, address, ptep); -} - static inline void __ptep_ipte(unsigned long address, pte_t *ptep) { if (!(pte_val(*ptep) & _PAGE_INVALID)) { @@ -1042,6 +1058,40 @@ static inline void ptep_flush_lazy(struct mm_struct *mm, mm->context.flush_mm = 1; } +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + pgste_t pgste; + pte_t pte; + int young; + + if (mm_has_pgste(vma->vm_mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); + } + + pte = *ptep; + __ptep_ipte(addr, ptep); + young = pte_young(pte); + pte = pte_mkold(pte); + + if (mm_has_pgste(vma->vm_mm)) { + pgste_set_pte(ptep, pte); + pgste_set_unlock(ptep, pgste); + } else + *ptep = pte; + + return young; +} + +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + return ptep_test_and_clear_young(vma, address, ptep); +} + /* * This is hard to understand. ptep_get_and_clear and ptep_clear_flush * both clear the TLB for the unmapped pte. The reason is that @@ -1229,7 +1279,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); - return __pte; + return pte_mkyoung(__pte); } static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) @@ -1338,10 +1388,41 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot) return pgprot_val(SEGMENT_WRITE); } +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ +#ifdef CONFIG_64BIT + if (pmd_prot_none(pmd)) { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; + } else { + pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG; + pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; + } +#endif + return pmd; +} + static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { + int young; + + young = pmd_young(pmd); pmd_val(pmd) &= _SEGMENT_CHG_MASK; pmd_val(pmd) |= massage_pgprot_pmd(newprot); + if (young) + pmd = pmd_mkyoung(pmd); return pmd; } @@ -1349,13 +1430,13 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot) { pmd_t __pmd; pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot); - return __pmd; + return pmd_mkyoung(__pmd); } static inline pmd_t pmd_mkwrite(pmd_t pmd) { - /* Do not clobber PROT_NONE pages! */ - if (!(pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)) + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1391,7 +1472,9 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmd_wrprotect(pmd_t pmd) { - pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; + /* Do not clobber PROT_NONE segments! */ + if (!pmd_prot_none(pmd)) + pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; return pmd; } @@ -1401,50 +1484,16 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } -static inline pmd_t pmd_mkold(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - -static inline pmd_t pmd_mkyoung(pmd_t pmd) -{ - /* No referenced bit in the segment table entry. */ - return pmd; -} - #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - unsigned long pmd_addr = pmd_val(*pmdp) & HPAGE_MASK; - long tmp, rc; - int counter; + pmd_t pmd; - rc = 0; - if (MACHINE_HAS_RRBM) { - counter = PTRS_PER_PTE >> 6; - asm volatile( - "0: .insn rre,0xb9ae0000,%0,%3\n" /* rrbm */ - " ogr %1,%0\n" - " la %3,0(%4,%3)\n" - " brct %2,0b\n" - : "=&d" (tmp), "+&d" (rc), "+d" (counter), - "+a" (pmd_addr) - : "a" (64 * 4096UL) : "cc"); - rc = !!rc; - } else { - counter = PTRS_PER_PTE; - asm volatile( - "0: rrbe 0,%2\n" - " la %2,0(%3,%2)\n" - " brc 12,1f\n" - " lhi %0,1\n" - "1: brct %1,0b\n" - : "+d" (rc), "+d" (counter), "+a" (pmd_addr) - : "a" (4096UL) : "cc"); - } - return rc; + pmd = *pmdp; + __pmd_idte(address, pmdp); + *pmdp = pmd_mkold(pmd); + return pmd_young(pmd); } #define __HAVE_ARCH_PMDP_GET_AND_CLEAR diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index b0bd0ae17796..248445f92604 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -10,19 +10,25 @@ static inline pmd_t __pte_to_pmd(pte_t pte) { - int none, prot; + int none, young, prot; pmd_t pmd; /* - * Convert encoding pte bits pmd bits - * .IR.....wdtp ..R...I..... - * empty .10.....0000 -> ..0...1..... - * prot-none, clean .11.....0001 -> ..1...1..... - * prot-none, dirty .10.....0101 -> ..1...1..... - * read-only, clean .01.....0001 -> ..1...0..... - * read-only, dirty .01.....0101 -> ..1...0..... - * read-write, clean .01.....1001 -> ..0...0..... - * read-write, dirty .00.....1101 -> ..0...0..... + * Convert encoding pte bits pmd bits + * .IR...wrdytp ..R...I...y. + * empty .10...000000 -> ..0...1...0. + * prot-none, clean, old .11...000001 -> ..0...1...1. + * prot-none, clean, young .11...000101 -> ..1...1...1. + * prot-none, dirty, old .10...001001 -> ..0...1...1. + * prot-none, dirty, young .10...001101 -> ..1...1...1. + * read-only, clean, old .11...010001 -> ..1...1...0. + * read-only, clean, young .01...010101 -> ..1...0...1. + * read-only, dirty, old .11...011001 -> ..1...1...0. + * read-only, dirty, young .01...011101 -> ..1...0...1. + * read-write, clean, old .11...110001 -> ..0...1...0. + * read-write, clean, young .01...110101 -> ..0...0...1. + * read-write, dirty, old .10...111001 -> ..0...1...0. + * read-write, dirty, young .00...111101 -> ..0...0...1. * Huge ptes are dirty by definition, a clean pte is made dirty * by the conversion. */ @@ -31,9 +37,14 @@ static inline pmd_t __pte_to_pmd(pte_t pte) if (pte_val(pte) & _PAGE_INVALID) pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID; none = (pte_val(pte) & _PAGE_PRESENT) && - (pte_val(pte) & _PAGE_INVALID); - prot = (pte_val(pte) & _PAGE_PROTECT); - if (prot || none) + !(pte_val(pte) & _PAGE_READ) && + !(pte_val(pte) & _PAGE_WRITE); + prot = (pte_val(pte) & _PAGE_PROTECT) && + !(pte_val(pte) & _PAGE_WRITE); + young = pte_val(pte) & _PAGE_YOUNG; + if (none || young) + pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG; + if (prot || (none && young)) pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; @@ -46,11 +57,14 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) /* * Convert encoding pmd bits pte bits - * ..R...I..... .IR.....wdtp - * empty ..0...1..... -> .10.....0000 - * prot-none, young ..1...1..... -> .10.....0101 - * read-only, young ..1...0..... -> .01.....0101 - * read-write, young ..0...0..... -> .00.....1101 + * ..R...I...y. .IR...wrdytp + * empty ..0...1...0. -> .10...000000 + * prot-none, old ..0...1...1. -> .10...001001 + * prot-none, young ..1...1...1. -> .10...001101 + * read-only, old ..1...1...0. -> .11...011001 + * read-only, young ..1...0...1. -> .01...011101 + * read-write, old ..0...1...0. -> .10...111001 + * read-write, young ..0...0...1. -> .00...111101 * Huge ptes are dirty by definition */ if (pmd_present(pmd)) { @@ -58,11 +72,17 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) (pmd_val(pmd) & PAGE_MASK); if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) pte_val(pte) |= _PAGE_INVALID; - else { + if (pmd_prot_none(pmd)) { + if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) + pte_val(pte) |= _PAGE_YOUNG; + } else { + pte_val(pte) |= _PAGE_READ; if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) pte_val(pte) |= _PAGE_PROTECT; else pte_val(pte) |= _PAGE_WRITE; + if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) + pte_val(pte) |= _PAGE_YOUNG; } } else pte_val(pte) = _PAGE_INVALID; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index befaea7003f7..6d16132d0850 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -754,7 +754,8 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, atomic_set(&page->_mapcount, 3); table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT, + PAGE_SIZE/2); return table; } @@ -792,26 +793,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits; - unsigned char skey; + unsigned long address, bits, skey; address = pte_val(*ptep) & PAGE_MASK; - skey = page_get_storage_key(address); + skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set storage key ACC and FP */ - page_set_storage_key(address, - (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)), - !nq); - + page_set_storage_key(address, skey, !nq); /* Merge host changed & referenced into pgste */ pgste_val(new) |= bits << 52; - /* Transfer skey changed & referenced bit to kvm user bits */ - pgste_val(new) |= bits << 45; /* PGSTE_UR_BIT & PGSTE_UC_BIT */ } /* changing the guest storage key is considered a change of the page */ if ((pgste_val(new) ^ pgste_val(old)) & (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; + pgste_val(new) |= PGSTE_HC_BIT; pgste_set_unlock(ptep, new); pte_unmap_unlock(*ptep, ptl); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e1299d40818d..bcfb70b60be6 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -118,6 +118,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) { pmd_val(*pm_dir) = __pa(address) | _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE | + _SEGMENT_ENTRY_YOUNG | (ro ? _SEGMENT_ENTRY_PROTECT : 0); address += PMD_SIZE; continue; diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 0807ddf97b05..f330d28e4d0e 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -208,10 +208,6 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG -#define page_test_and_clear_young(pfn) (0) -#endif - #ifndef __HAVE_ARCH_PGD_OFFSET_GATE #define pgd_offset_gate(mm, addr) pgd_offset(mm, addr) #endif diff --git a/mm/rmap.c b/mm/rmap.c index b2e29acd7e3d..07748e68b729 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -873,9 +873,6 @@ int page_referenced(struct page *page, vm_flags); if (we_locked) unlock_page(page); - - if (page_test_and_clear_young(page_to_pfn(page))) - referenced++; } out: return referenced; From 67f43f38eeb34da43b624a29d57b703f4c4844b4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:33:16 +0200 Subject: [PATCH 24/32] s390/pci/hotplug: convert to be builtin only Convert s390' pci hotplug to be builtin only, with no module option. Suggested-by: Bjorn Helgaas Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 24 ++++++------ arch/s390/pci/pci.c | 34 +++-------------- drivers/pci/hotplug/Kconfig | 2 +- drivers/pci/hotplug/s390_pci_hpc.c | 61 +----------------------------- 4 files changed, 20 insertions(+), 101 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index d0872769d44e..64081f85ffdb 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -116,11 +116,6 @@ struct zpci_dev { struct dentry *debugfs_perf; }; -struct pci_hp_callback_ops { - int (*create_slot) (struct zpci_dev *zdev); - void (*remove_slot) (struct zpci_dev *zdev); -}; - static inline bool zdev_enabled(struct zpci_dev *zdev) { return (zdev->fh & (1UL << 31)) ? true : false; @@ -154,6 +149,17 @@ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} #endif /* CONFIG_PCI */ +#ifdef CONFIG_HOTPLUG_PCI_S390 +int zpci_init_slot(struct zpci_dev *); +void zpci_exit_slot(struct zpci_dev *); +#else /* CONFIG_HOTPLUG_PCI_S390 */ +static inline int zpci_init_slot(struct zpci_dev *zdev) +{ + return 0; +} +static inline void zpci_exit_slot(struct zpci_dev *zdev) {} +#endif /* CONFIG_HOTPLUG_PCI_S390 */ + /* Helpers */ struct zpci_dev *get_zdev(struct pci_dev *); struct zpci_dev *get_zdev_by_fid(u32); @@ -167,14 +173,6 @@ void zpci_sysfs_remove_device(struct device *); int zpci_dma_init(void); void zpci_dma_exit(void); -/* Hotplug */ -extern struct mutex zpci_list_lock; -extern struct list_head zpci_list; -extern unsigned int s390_pci_probe; - -void zpci_register_hp_ops(struct pci_hp_callback_ops *); -void zpci_deregister_hp_ops(void); - /* FMB */ int zpci_fmb_enable_device(struct zpci_dev *); int zpci_fmb_disable_device(struct zpci_dev *); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index d65dc4f50e2a..56f8a1c4d9bf 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -45,11 +45,8 @@ #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS /* list of all detected zpci devices */ -LIST_HEAD(zpci_list); -EXPORT_SYMBOL_GPL(zpci_list); -DEFINE_MUTEX(zpci_list_lock); -EXPORT_SYMBOL_GPL(zpci_list_lock); - +static LIST_HEAD(zpci_list); +static DEFINE_MUTEX(zpci_list_lock); static void zpci_enable_irq(struct irq_data *data); static void zpci_disable_irq(struct irq_data *data); @@ -60,8 +57,6 @@ static struct irq_chip zpci_irq_chip = { .irq_mask = zpci_disable_irq, }; -static struct pci_hp_callback_ops *hotplug_ops; - static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); static DEFINE_SPINLOCK(zpci_domain_lock); @@ -828,10 +823,10 @@ int zpci_create_device(struct zpci_dev *zdev) mutex_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); - if (hotplug_ops) - hotplug_ops->create_slot(zdev); mutex_unlock(&zpci_list_lock); + zpci_init_slot(zdev); + return 0; out_disable: @@ -884,24 +879,7 @@ static void zpci_mem_exit(void) kmem_cache_destroy(zdev_fmb_cache); } -void zpci_register_hp_ops(struct pci_hp_callback_ops *ops) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = ops; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_register_hp_ops); - -void zpci_deregister_hp_ops(void) -{ - mutex_lock(&zpci_list_lock); - hotplug_ops = NULL; - mutex_unlock(&zpci_list_lock); -} -EXPORT_SYMBOL_GPL(zpci_deregister_hp_ops); - -unsigned int s390_pci_probe; -EXPORT_SYMBOL_GPL(s390_pci_probe); +static unsigned int s390_pci_probe; char * __init pcibios_setup(char *str) { @@ -960,4 +938,4 @@ out_mem: out: return rc; } -subsys_initcall(pci_base_init); +subsys_initcall_sync(pci_base_init); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index d85009de713d..0a648af89531 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -146,7 +146,7 @@ config HOTPLUG_PCI_SGI When in doubt, say N. config HOTPLUG_PCI_S390 - tristate "System z PCI Hotplug Support" + bool "System z PCI Hotplug Support" depends on S390 && 64BIT help Say Y here if you want to use the System z PCI Hotplug diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index ea3fa90d020a..0d9c12fbcb3a 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -148,7 +148,7 @@ static struct hotplug_slot_ops s390_hotplug_slot_ops = { .get_adapter_status = get_adapter_status, }; -static int init_pci_slot(struct zpci_dev *zdev) +int zpci_init_slot(struct zpci_dev *zdev) { struct hotplug_slot *hotplug_slot; struct hotplug_slot_info *info; @@ -202,7 +202,7 @@ error: return -ENOMEM; } -static void exit_pci_slot(struct zpci_dev *zdev) +void zpci_exit_slot(struct zpci_dev *zdev) { struct list_head *tmp, *n; struct slot *slot; @@ -215,60 +215,3 @@ static void exit_pci_slot(struct zpci_dev *zdev) pci_hp_deregister(slot->hotplug_slot); } } - -static struct pci_hp_callback_ops hp_ops = { - .create_slot = init_pci_slot, - .remove_slot = exit_pci_slot, -}; - -static void __init init_pci_slots(void) -{ - struct zpci_dev *zdev; - - /* - * Create a structure for each slot, and register that slot - * with the pci_hotplug subsystem. - */ - mutex_lock(&zpci_list_lock); - list_for_each_entry(zdev, &zpci_list, entry) { - init_pci_slot(zdev); - } - mutex_unlock(&zpci_list_lock); -} - -static void __exit exit_pci_slots(void) -{ - struct list_head *tmp, *n; - struct slot *slot; - - /* - * Unregister all of our slots with the pci_hotplug subsystem. - * Memory will be freed in release_slot() callback after slot's - * lifespan is finished. - */ - list_for_each_safe(tmp, n, &s390_hotplug_slot_list) { - slot = list_entry(tmp, struct slot, slot_list); - list_del(&slot->slot_list); - pci_hp_deregister(slot->hotplug_slot); - } -} - -static int __init pci_hotplug_s390_init(void) -{ - if (!s390_pci_probe) - return -EOPNOTSUPP; - - zpci_register_hp_ops(&hp_ops); - init_pci_slots(); - - return 0; -} - -static void __exit pci_hotplug_s390_exit(void) -{ - exit_pci_slots(); - zpci_deregister_hp_ops(); -} - -module_init(pci_hotplug_s390_init); -module_exit(pci_hotplug_s390_exit); From cb8091828757bbc9459ef59248f4a793e681f8cd Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:34:37 +0200 Subject: [PATCH 25/32] s390/pci: use claim_resource Use pci_claim_resource to find and request bus ressources in pcibios_add_device. Also move some (de)initialization stuff to pcibios_enable_device/pcibios_disable_device. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 63 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 56f8a1c4d9bf..61167b1209a3 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -541,8 +541,6 @@ static void zpci_map_resources(struct zpci_dev *zdev) continue; pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; - pr_debug("BAR%i: -> start: %Lx end: %Lx\n", - i, pdev->resource[i].start, pdev->resource[i].end); } } @@ -574,32 +572,6 @@ void zpci_free_device(struct zpci_dev *zdev) kfree(zdev); } -/* - * Too late for any s390 specific setup, since interrupts must be set up - * already which requires DMA setup too and the pci scan will access the - * config space, which only works if the function handle is enabled. - */ -int pcibios_enable_device(struct pci_dev *pdev, int mask) -{ - struct resource *res; - u16 cmd; - int i; - - pci_read_config_word(pdev, PCI_COMMAND, &cmd); - - for (i = 0; i < PCI_BAR_COUNT; i++) { - res = &pdev->resource[i]; - - if (res->flags & IORESOURCE_IO) - return -EINVAL; - - if (res->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - pci_write_config_word(pdev, PCI_COMMAND, cmd); - return 0; -} - int pcibios_add_platform_entries(struct pci_dev *pdev) { return zpci_sysfs_add_device(&pdev->dev); @@ -689,16 +661,49 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry) int pcibios_add_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + int i; + + zdev->pdev = pdev; + zpci_map_resources(zdev); + + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + if (res->parent || !res->flags) + continue; + pci_claim_resource(pdev, i); + } + + return 0; +} + +int pcibios_enable_device(struct pci_dev *pdev, int mask) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct resource *res; + u16 cmd; + int i; zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); zpci_map_resources(zdev); + pci_read_config_word(pdev, PCI_COMMAND, &cmd); + for (i = 0; i < PCI_BAR_COUNT; i++) { + res = &pdev->resource[i]; + + if (res->flags & IORESOURCE_IO) + return -EINVAL; + + if (res->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + pci_write_config_word(pdev, PCI_COMMAND, cmd); return 0; } -void pcibios_release_device(struct pci_dev *pdev) +void pcibios_disable_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); From 0ff70ec88ba61f72b05b365a21fbd8aa60436254 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:35:19 +0200 Subject: [PATCH 26/32] s390/pci: add recover sysfs knob Add an arch specific attribute to recover a pci function from an error state or config space blockage. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 4 ++-- arch/s390/pci/pci_sysfs.c | 27 +++++++++++++++++++++++++++ drivers/pci/hotplug/s390_pci_hpc.c | 2 -- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 61167b1209a3..b0ccd424308a 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -791,6 +791,8 @@ int zpci_enable_device(struct zpci_dev *zdev) rc = zpci_dma_init_device(zdev); if (rc) goto out_dma; + + zdev->state = ZPCI_FN_STATE_ONLINE; return 0; out_dma: @@ -819,8 +821,6 @@ int zpci_create_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) goto out_free; - - zdev->state = ZPCI_FN_STATE_ONLINE; } rc = zpci_scan_bus(zdev); if (rc) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index e99a2557f186..cf8a12ff733b 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -48,11 +48,38 @@ static ssize_t show_pfgid(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR(pfgid, S_IRUGO, show_pfgid, NULL); +static void recover_callback(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); + int ret; + + pci_stop_and_remove_bus_device(pdev); + ret = zpci_disable_device(zdev); + if (ret) + return; + + ret = zpci_enable_device(zdev); + if (ret) + return; + + pci_rescan_bus(zdev->bus); +} + +static ssize_t store_recover(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int rc = device_schedule_callback(dev, recover_callback); + return rc ? rc : count; +} +static DEVICE_ATTR(recover, S_IWUSR, NULL, store_recover); + static struct device_attribute *zpci_dev_attrs[] = { &dev_attr_function_id, &dev_attr_function_handle, &dev_attr_pchid, &dev_attr_pfgid, + &dev_attr_recover, NULL, }; diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index 0d9c12fbcb3a..66e505ca24ef 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -79,8 +79,6 @@ static int enable_slot(struct hotplug_slot *hotplug_slot) if (rc) goto out_deconfigure; - slot->zdev->state = ZPCI_FN_STATE_ONLINE; - pci_scan_slot(slot->zdev->bus, ZPCI_DEVFN); pci_bus_add_devices(slot->zdev->bus); From 77e844b9644026c11c5883144540155de39af767 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:36:49 +0200 Subject: [PATCH 27/32] s390/hibernate: add early resume function Some functions that do arch specific resume actions are called directly from swsusp_asm64.S . Before we add another function call provide a generic s390_early_resume function which can be used for this purpose. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cio.h | 1 + arch/s390/kernel/suspend.c | 9 +++++++++ arch/s390/kernel/swsusp_asm64.S | 7 ++----- drivers/s390/cio/css.h | 2 -- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index ffb898961c8d..d42625053c37 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -296,6 +296,7 @@ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, return 0; } +void channel_subsystem_reinit(void); extern void css_schedule_reprobe(void); extern void reipl_ccw_dev(struct ccw_dev_id *id); diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index c479d2f9605b..eebab9f83f1d 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -10,6 +10,8 @@ #include #include #include +#include +#include /* * References to section boundaries @@ -211,3 +213,10 @@ void restore_processor_state(void) __ctl_set_bit(0,28); local_mcck_enable(); } + +/* Called at the end of swsusp_arch_resume */ +void s390_early_resume(void) +{ + lgr_info_log(); + channel_subsystem_reinit(); +} diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index c487be4cfc81..6b09fdffbd2f 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -281,11 +281,8 @@ restore_registers: lghi %r2,0 brasl %r14,arch_set_page_states - /* Log potential guest relocation */ - brasl %r14,lgr_info_log - - /* Reinitialize the channel subsystem */ - brasl %r14,channel_subsystem_reinit + /* Call arch specific early resume code */ + brasl %r14,s390_early_resume /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index b1de60335238..29351321bad6 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -130,8 +130,6 @@ struct channel_subsystem { extern struct channel_subsystem *channel_subsystems[]; -void channel_subsystem_reinit(void); - /* Helper functions to build lists for the slow path. */ void css_schedule_eval(struct subchannel_id schid); void css_schedule_eval_all(void); From 1d57896638f080165165dd9fb4cf848220dfd853 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:37:28 +0200 Subject: [PATCH 28/32] s390/pci: split lpf List pci functions is used to query and iterate over pci functions. This function currently has 2 users - initial device discovery and rescan after a machine check. Instead of having a multipurpose function pass a callback which gets called for each pci function. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 4 +- arch/s390/pci/pci.c | 7 +-- arch/s390/pci/pci_clp.c | 113 ++++++++++++++++++++++-------------- arch/s390/pci/pci_event.c | 2 +- 4 files changed, 72 insertions(+), 54 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 64081f85ffdb..4b2bbc1fdbe0 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -135,7 +135,8 @@ int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); /* CLP */ -int clp_find_pci_devices(void); +int clp_scan_pci_devices(void); +int clp_rescan_pci_devices(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); @@ -163,7 +164,6 @@ static inline void zpci_exit_slot(struct zpci_dev *zdev) {} /* Helpers */ struct zpci_dev *get_zdev(struct pci_dev *); struct zpci_dev *get_zdev_by_fid(u32); -bool zpci_fid_present(u32); /* sysfs */ int zpci_sysfs_add_device(struct device *); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index b0ccd424308a..a7ed6685e7fb 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -99,11 +99,6 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) return zdev; } -bool zpci_fid_present(u32 fid) -{ - return (get_zdev_by_fid(fid) != NULL) ? true : false; -} - static struct zpci_dev *get_zdev_by_bus(struct pci_bus *bus) { return (bus && bus->sysdata) ? (struct zpci_dev *) bus->sysdata : NULL; @@ -926,7 +921,7 @@ static int __init pci_base_init(void) if (rc) goto out_dma; - rc = clp_find_pci_devices(); + rc = clp_scan_pci_devices(); if (rc) goto out_find; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 2e9539625d93..70db060b4d41 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -36,9 +36,9 @@ static inline u8 clp_instr(void *data) return cc; } -static void *clp_alloc_block(void) +static void *clp_alloc_block(gfp_t gfp_mask) { - return (void *) __get_free_pages(GFP_KERNEL, get_order(CLP_BLK_SIZE)); + return (void *) __get_free_pages(gfp_mask, get_order(CLP_BLK_SIZE)); } static void clp_free_block(void *ptr) @@ -70,7 +70,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) struct clp_req_rsp_query_pci_grp *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -113,7 +113,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) struct clp_req_rsp_query_pci *rrb; int rc; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -181,7 +181,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) struct clp_req_rsp_set_pci *rrb; int rc, retries = 1000; - rrb = clp_alloc_block(); + rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) return -ENOMEM; @@ -245,49 +245,12 @@ int clp_disable_fh(struct zpci_dev *zdev) return rc; } -static void clp_check_pcifn_entry(struct clp_fh_list_entry *entry) +static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, + void (*cb)(struct clp_fh_list_entry *entry)) { - int present, rc; - - if (!entry->vendor_id) - return; - - /* TODO: be a little bit more scalable */ - present = zpci_fid_present(entry->fid); - - if (present) - pr_debug("%s: device %x already present\n", __func__, entry->fid); - - /* skip already used functions */ - if (present && entry->config_state) - return; - - /* aev 306: function moved to stand-by state */ - if (present && !entry->config_state) { - /* - * The handle is already disabled, that means no iota/irq freeing via - * the firmware interfaces anymore. Need to free resources manually - * (DMA memory, debug, sysfs)... - */ - zpci_stop_device(get_zdev_by_fid(entry->fid)); - return; - } - - rc = clp_add_pci_device(entry->fid, entry->fh, entry->config_state); - if (rc) - pr_err("Failed to add fid: 0x%x\n", entry->fid); -} - -int clp_find_pci_devices(void) -{ - struct clp_req_rsp_list_pci *rrb; u64 resume_token = 0; int entries, i, rc; - rrb = clp_alloc_block(); - if (!rrb) - return -ENOMEM; - do { memset(rrb, 0, sizeof(*rrb)); rrb->request.hdr.len = sizeof(rrb->request); @@ -316,12 +279,72 @@ int clp_find_pci_devices(void) resume_token = rrb->response.resume_token; for (i = 0; i < entries; i++) - clp_check_pcifn_entry(&rrb->response.fh_list[i]); + cb(&rrb->response.fh_list[i]); } while (resume_token); pr_debug("Maximum number of supported PCI functions: %u\n", rrb->response.max_fn); out: + return rc; +} + +static void __clp_add(struct clp_fh_list_entry *entry) +{ + if (!entry->vendor_id) + return; + + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); +} + +static void __clp_rescan(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) { + clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + return; + } + + if (!entry->config_state) { + /* + * The handle is already disabled, that means no iota/irq freeing via + * the firmware interfaces anymore. Need to free resources manually + * (DMA memory, debug, sysfs)... + */ + zpci_stop_device(zdev); + } +} + +int clp_scan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_add); + + clp_free_block(rrb); + return rc; +} + +int clp_rescan_pci_devices(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_KERNEL); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_rescan); + clp_free_block(rrb); return rc; } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index ec62e3a0dc09..0aecaf954845 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -69,7 +69,7 @@ static void zpci_event_log_avail(struct zpci_ccdf_avail *ccdf) clp_add_pci_device(ccdf->fid, ccdf->fh, 0); break; case 0x0306: - clp_find_pci_devices(); + clp_rescan_pci_devices(); break; default: break; From d03abe5882cc4815bf98c0e01a1deafa4a5d6730 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:38:33 +0200 Subject: [PATCH 29/32] s390/pci: try harder to modify a function In rare situations a PCI function can report a busy condition when we issue the modify pci function command. A temporary busy condition can exceed 1 second but not 2 seconds. Increase the time until we report an error to 2 seconds. Also increase the time we sleep between the retries to reduce the load in this case. Suggested-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_clp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 70db060b4d41..3eaf63a6ecac 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -179,7 +179,7 @@ error: static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) { struct clp_req_rsp_set_pci *rrb; - int rc, retries = 1000; + int rc, retries = 100; rrb = clp_alloc_block(GFP_KERNEL); if (!rrb) @@ -199,7 +199,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) retries--; if (retries < 0) break; - msleep(1); + msleep(20); } } while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY); From 57b5918c33a0797930c3791fb602a8a9d46ef80c Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 19:40:01 +0200 Subject: [PATCH 30/32] s390/pci: update function handle after resume from hibernate Function handles may change while the system was in hibernation use list pci functions and update the function handles. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 4 ++++ arch/s390/kernel/suspend.c | 2 ++ arch/s390/pci/pci.c | 15 ++++++++++----- arch/s390/pci/pci_clp.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 4b2bbc1fdbe0..c290f13d1c47 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -6,6 +6,7 @@ /* must be set before including pci_clp.h */ #define PCI_BAR_COUNT 6 +#include #include #include #include @@ -137,6 +138,7 @@ int zpci_unregister_ioat(struct zpci_dev *, u8); /* CLP */ int clp_scan_pci_devices(void); int clp_rescan_pci_devices(void); +int clp_rescan_pci_devices_simple(void); int clp_add_pci_device(u32, u32, int); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); @@ -145,9 +147,11 @@ int clp_disable_fh(struct zpci_dev *); /* Error handling and recovery */ void zpci_event_error(void *); void zpci_event_availability(void *); +void zpci_rescan(void); #else /* CONFIG_PCI */ static inline void zpci_event_error(void *e) {} static inline void zpci_event_availability(void *e) {} +static inline void zpci_rescan(void) {} #endif /* CONFIG_PCI */ #ifdef CONFIG_HOTPLUG_PCI_S390 diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c index eebab9f83f1d..737bff38e3ee 100644 --- a/arch/s390/kernel/suspend.c +++ b/arch/s390/kernel/suspend.c @@ -12,6 +12,7 @@ #include #include #include +#include /* * References to section boundaries @@ -219,4 +220,5 @@ void s390_early_resume(void) { lgr_info_log(); channel_subsystem_reinit(); + zpci_rescan(); } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index a7ed6685e7fb..f17a8343e360 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -46,7 +46,7 @@ /* list of all detected zpci devices */ static LIST_HEAD(zpci_list); -static DEFINE_MUTEX(zpci_list_lock); +static DEFINE_SPINLOCK(zpci_list_lock); static void zpci_enable_irq(struct irq_data *data); static void zpci_disable_irq(struct irq_data *data); @@ -88,14 +88,14 @@ struct zpci_dev *get_zdev_by_fid(u32 fid) { struct zpci_dev *tmp, *zdev = NULL; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_for_each_entry(tmp, &zpci_list, entry) { if (tmp->fid == fid) { zdev = tmp; break; } } - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); return zdev; } @@ -821,9 +821,9 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out_disable; - mutex_lock(&zpci_list_lock); + spin_lock(&zpci_list_lock); list_add_tail(&zdev->entry, &zpci_list); - mutex_unlock(&zpci_list_lock); + spin_unlock(&zpci_list_lock); zpci_init_slot(zdev); @@ -939,3 +939,8 @@ out: return rc; } subsys_initcall_sync(pci_base_init); + +void zpci_rescan(void) +{ + clp_rescan_pci_devices_simple(); +} diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 3eaf63a6ecac..475563c3d1e4 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -319,6 +319,20 @@ static void __clp_rescan(struct clp_fh_list_entry *entry) } } +static void __clp_update(struct clp_fh_list_entry *entry) +{ + struct zpci_dev *zdev; + + if (!entry->vendor_id) + return; + + zdev = get_zdev_by_fid(entry->fid); + if (!zdev) + return; + + zdev->fh = entry->fh; +} + int clp_scan_pci_devices(void) { struct clp_req_rsp_list_pci *rrb; @@ -348,3 +362,18 @@ int clp_rescan_pci_devices(void) clp_free_block(rrb); return rc; } + +int clp_rescan_pci_devices_simple(void) +{ + struct clp_req_rsp_list_pci *rrb; + int rc; + + rrb = clp_alloc_block(GFP_NOWAIT); + if (!rrb) + return -ENOMEM; + + rc = clp_list_pci(rrb, __clp_update); + + clp_free_block(rrb); + return rc; +} From eb072a7996467937a1582d0188ed082768902a5a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 20:31:06 +0200 Subject: [PATCH 31/32] s390/cio: fix unlocked access of global bitmap Access to the slow_subchannel_set has to be secured via the slow_subchannel_lock. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/css.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 4eb2a54e64f2..8c2cb87bccc5 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -546,7 +546,9 @@ static int slow_eval_unknown_fn(struct subchannel_id schid, void *data) case -ENOMEM: case -EIO: /* These should abort looping */ + spin_lock_irq(&slow_subchannel_lock); idset_sch_del_subseq(slow_subchannel_set, schid); + spin_unlock_irq(&slow_subchannel_lock); break; default: rc = 0; From 224593215525a79fe1acfffaafa528af9dc6f738 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 29 Aug 2013 20:31:50 +0200 Subject: [PATCH 32/32] s390/pci: use virtual memory for iommu bitmap The bitmap used to mark dma mappings can be quite large on systems with huge amounts of memory. Use virtual memory for this bitmap. Suggested-by: Alexander Schmidt Reviewed-by: Gerald Schaefer Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_dma.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2125310aa891..7e5573acb063 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -407,7 +408,6 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int zpci_dma_init_device(struct zpci_dev *zdev) { - unsigned int bitmap_order; int rc; spin_lock_init(&zdev->iommu_bitmap_lock); @@ -421,12 +421,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev) zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET; zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - bitmap_order = get_order(zdev->iommu_pages / 8); - pr_info("iommu_size: 0x%lx iommu_pages: 0x%lx bitmap_order: %i\n", - zdev->iommu_size, zdev->iommu_pages, bitmap_order); - - zdev->iommu_bitmap = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, - bitmap_order); + zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8); if (!zdev->iommu_bitmap) { rc = -ENOMEM; goto out_reg; @@ -451,8 +446,7 @@ void zpci_dma_exit_device(struct zpci_dev *zdev) { zpci_unregister_ioat(zdev, 0); dma_cleanup_tables(zdev); - free_pages((unsigned long) zdev->iommu_bitmap, - get_order(zdev->iommu_pages / 8)); + vfree(zdev->iommu_bitmap); zdev->iommu_bitmap = NULL; zdev->next_bit = 0; }