From 1c6e4b1811bffc6b0520310f6d8a6b4ec760fdbc Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 30 Mar 2015 16:46:05 -0700 Subject: [PATCH 1/8] s390: Use bool function return values of true/false not 1/0 Use the normal return values for bool functions Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 709955ddaa4d..9d395961e713 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -42,7 +42,7 @@ static inline int dma_supported(struct device *dev, u64 mask) static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { if (!dev->dma_mask) - return 0; + return false; return addr + size - 1 <= *dev->dma_mask; } From 054623105728b06852f077299e2bf1bf3d5f2b0b Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 1 Apr 2015 16:08:32 +0200 Subject: [PATCH 2/8] s390/bpf: Add s390x eBPF JIT compiler backend Replace 32 bit BPF JIT backend with new 64 bit eBPF backend. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- arch/s390/net/bpf_jit.S | 197 ++-- arch/s390/net/bpf_jit.h | 58 ++ arch/s390/net/bpf_jit_comp.c | 1834 +++++++++++++++++++++------------- 4 files changed, 1261 insertions(+), 830 deletions(-) create mode 100644 arch/s390/net/bpf_jit.h diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a5ced5c3c1e0..53a91425d92d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -115,7 +115,7 @@ config S390 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK + select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z9_109_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S index ba44c9f55346..a1c917d881ec 100644 --- a/arch/s390/net/bpf_jit.S +++ b/arch/s390/net/bpf_jit.S @@ -1,134 +1,115 @@ /* * BPF Jit compiler for s390, help functions. * - * Copyright IBM Corp. 2012 + * Copyright IBM Corp. 2012,2015 * * Author(s): Martin Schwidefsky + * Michael Holzheu */ + #include +#include "bpf_jit.h" /* * Calling convention: - * registers %r2, %r6-%r8, %r10-%r11, %r13, %r15 are call saved - * %r2: skb pointer - * %r3: offset parameter - * %r5: BPF A accumulator - * %r8: return address - * %r9: save register for skb pointer - * %r10: skb->data - * %r11: skb->len - skb->data_len (headlen) - * %r12: BPF X accumulator + * registers %r7-%r10, %r11,%r13, and %r15 are call saved + * + * Input (64 bit): + * %r3 (%b2) = offset into skb data + * %r6 (%b5) = return address + * %r7 (%b6) = skb pointer + * %r12 = skb data pointer + * + * Output: + * %r14= %b0 = return value (read skb value) + * + * Work registers: %r2,%r4,%r5,%r14 * * skb_copy_bits takes 4 parameters: * %r2 = skb pointer * %r3 = offset into skb data * %r4 = pointer to temp buffer * %r5 = length to copy + * Return value in %r2: 0 = ok + * + * bpf_internal_load_pointer_neg_helper takes 3 parameters: + * %r2 = skb pointer + * %r3 = offset into data + * %r4 = length to copy + * Return value in %r2: Pointer to data */ -#define SKBDATA %r8 - /* A = *(u32 *) (skb->data+K+X) */ -ENTRY(sk_load_word_ind) - ar %r3,%r12 # offset += X - bmr %r8 # < 0 -> return with cc +#define SKF_MAX_NEG_OFF -0x200000 /* SKF_LL_OFF from filter.h */ - /* A = *(u32 *) (skb->data+K) */ -ENTRY(sk_load_word) - llgfr %r1,%r3 # extend offset - ahi %r3,4 # offset + 4 - clr %r11,%r3 # hlen <= offset + 4 ? - jl sk_load_word_slow - l %r5,0(%r1,%r10) # get word from skb - xr %r1,%r1 # set cc to zero - br %r8 +/* + * Load SIZE bytes from SKB + */ +#define sk_load_common(NAME, SIZE, LOAD) \ +ENTRY(sk_load_##NAME); \ + ltgr %r3,%r3; /* Is offset negative? */ \ + jl sk_load_##NAME##_slow_neg; \ +ENTRY(sk_load_##NAME##_pos); \ + aghi %r3,SIZE; /* Offset + SIZE */ \ + clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ + jh sk_load_##NAME##_slow; \ + LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ + b OFF_OK(%r6); /* Return */ \ + \ +sk_load_##NAME##_slow:; \ + lgr %r2,%r7; /* Arg1 = skb pointer */ \ + aghi %r3,-SIZE; /* Arg2 = offset */ \ + la %r4,STK_OFF_TMP(%r15); /* Arg3 = temp bufffer */ \ + lghi %r5,SIZE; /* Arg4 = size */ \ + brasl %r14,skb_copy_bits; /* Get data from skb */ \ + LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ + ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ + br %r6; /* Return */ -sk_load_word_slow: - lgr %r9,%r2 # save %r2 - lgr %r3,%r1 # offset - la %r4,160(%r15) # pointer to temp buffer - lghi %r5,4 # 4 bytes - brasl %r14,skb_copy_bits # get data from skb - l %r5,160(%r15) # load result from temp buffer - ltgr %r2,%r2 # set cc to (%r2 != 0) - lgr %r2,%r9 # restore %r2 - br %r8 +sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ +sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ - /* A = *(u16 *) (skb->data+K+X) */ -ENTRY(sk_load_half_ind) - ar %r3,%r12 # offset += X - bmr %r8 # < 0 -> return with cc - - /* A = *(u16 *) (skb->data+K) */ -ENTRY(sk_load_half) - llgfr %r1,%r3 # extend offset - ahi %r3,2 # offset + 2 - clr %r11,%r3 # hlen <= offset + 2 ? - jl sk_load_half_slow - llgh %r5,0(%r1,%r10) # get half from skb - xr %r1,%r1 # set cc to zero - br %r8 - -sk_load_half_slow: - lgr %r9,%r2 # save %r2 - lgr %r3,%r1 # offset - la %r4,162(%r15) # pointer to temp buffer - lghi %r5,2 # 2 bytes - brasl %r14,skb_copy_bits # get data from skb - xc 160(2,%r15),160(%r15) - l %r5,160(%r15) # load result from temp buffer - ltgr %r2,%r2 # set cc to (%r2 != 0) - lgr %r2,%r9 # restore %r2 - br %r8 - - /* A = *(u8 *) (skb->data+K+X) */ -ENTRY(sk_load_byte_ind) - ar %r3,%r12 # offset += X - bmr %r8 # < 0 -> return with cc - - /* A = *(u8 *) (skb->data+K) */ +/* + * Load 1 byte from SKB (optimized version) + */ + /* r14 = *(u8 *) (skb->data+offset) */ ENTRY(sk_load_byte) - llgfr %r1,%r3 # extend offset - clr %r11,%r3 # hlen < offset ? - jle sk_load_byte_slow - lhi %r5,0 - ic %r5,0(%r1,%r10) # get byte from skb - xr %r1,%r1 # set cc to zero - br %r8 + ltgr %r3,%r3 # Is offset negative? + jl sk_load_byte_slow_neg +ENTRY(sk_load_byte_pos) + clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? + jnl sk_load_byte_slow + llgc %r14,0(%r3,%r12) # Get byte from skb + b OFF_OK(%r6) # Return OK sk_load_byte_slow: - lgr %r9,%r2 # save %r2 - lgr %r3,%r1 # offset - la %r4,163(%r15) # pointer to temp buffer - lghi %r5,1 # 1 byte - brasl %r14,skb_copy_bits # get data from skb - xc 160(3,%r15),160(%r15) - l %r5,160(%r15) # load result from temp buffer - ltgr %r2,%r2 # set cc to (%r2 != 0) - lgr %r2,%r9 # restore %r2 - br %r8 + lgr %r2,%r7 # Arg1 = skb pointer + # Arg2 = offset + la %r4,STK_OFF_TMP(%r15) # Arg3 = pointer to temp buffer + lghi %r5,1 # Arg4 = size (1 byte) + brasl %r14,skb_copy_bits # Get data from skb + llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer + ltgr %r2,%r2 # Set cc to (%r2 != 0) + br %r6 # Return cc - /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ -ENTRY(sk_load_byte_msh) - llgfr %r1,%r3 # extend offset - clr %r11,%r3 # hlen < offset ? - jle sk_load_byte_msh_slow - lhi %r12,0 - ic %r12,0(%r1,%r10) # get byte from skb - nill %r12,0x0f - sll %r12,2 - xr %r1,%r1 # set cc to zero - br %r8 +#define sk_negative_common(NAME, SIZE, LOAD) \ +sk_load_##NAME##_slow_neg:; \ + cgfi %r3,SKF_MAX_NEG_OFF; \ + jl bpf_error; \ + lgr %r2,%r7; /* Arg1 = skb pointer */ \ + /* Arg2 = offset */ \ + lghi %r4,SIZE; /* Arg3 = size */ \ + brasl %r14,bpf_internal_load_pointer_neg_helper; \ + ltgr %r2,%r2; \ + jz bpf_error; \ + LOAD %r14,0(%r2); /* Get data from pointer */ \ + xr %r3,%r3; /* Set cc to zero */ \ + br %r6; /* Return cc */ -sk_load_byte_msh_slow: - lgr %r9,%r2 # save %r2 - lgr %r3,%r1 # offset - la %r4,163(%r15) # pointer to temp buffer - lghi %r5,1 # 1 byte - brasl %r14,skb_copy_bits # get data from skb - xc 160(3,%r15),160(%r15) - l %r12,160(%r15) # load result from temp buffer - nill %r12,0x0f - sll %r12,2 - ltgr %r2,%r2 # set cc to (%r2 != 0) - lgr %r2,%r9 # restore %r2 - br %r8 +sk_negative_common(word, 4, llgf) +sk_negative_common(half, 2, llgh) +sk_negative_common(byte, 1, llgc) + +bpf_error: +# force a return 0 from jit handler + ltgr %r15,%r15 # Set condition code + br %r6 diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h new file mode 100644 index 000000000000..ba8593a515ba --- /dev/null +++ b/arch/s390/net/bpf_jit.h @@ -0,0 +1,58 @@ +/* + * BPF Jit compiler defines + * + * Copyright IBM Corp. 2012,2015 + * + * Author(s): Martin Schwidefsky + * Michael Holzheu + */ + +#ifndef __ARCH_S390_NET_BPF_JIT_H +#define __ARCH_S390_NET_BPF_JIT_H + +#ifndef __ASSEMBLY__ + +#include +#include + +extern u8 sk_load_word_pos[], sk_load_half_pos[], sk_load_byte_pos[]; +extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; + +#endif /* __ASSEMBLY__ */ + +/* + * Stackframe layout (packed stack): + * + * ^ high + * +---------------+ | + * | old backchain | | + * +---------------+ | + * | r15 - r6 | | + * BFP -> +===============+ | + * | | | + * | BPF stack | | + * | | | + * +---------------+ | + * | 8 byte hlen | | + * R15+168 -> +---------------+ | + * | 4 byte align | | + * +---------------+ | + * | 4 byte temp | | + * | for bpf_jit.S | | + * R15+160 -> +---------------+ | + * | new backchain | | + * R15+152 -> +---------------+ | + * | + 152 byte SA | | + * R15 -> +---------------+ + low + * + * We get 160 bytes stack space from calling function, but only use + * 11 * 8 byte (old backchain + r15 - r6) for storing registers. + */ +#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8)) +#define STK_OFF_TMP 160 /* Offset of tmp buffer on stack */ +#define STK_OFF_HLEN 168 /* Offset of SKB header length on stack */ + +/* Offset to skip condition code check */ +#define OFF_OK 4 + +#endif /* __ARCH_S390_NET_BPF_JIT_H */ diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index bbd1981cc150..7690dc8e1ab5 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1,817 +1,1209 @@ /* * BPF Jit compiler for s390. * - * Copyright IBM Corp. 2012 + * Minimum build requirements: + * + * - HAVE_MARCH_Z196_FEATURES: laal, laalg + * - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj + * - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf + * - PACK_STACK + * - 64BIT + * + * Copyright IBM Corp. 2012,2015 * * Author(s): Martin Schwidefsky + * Michael Holzheu */ + +#define KMSG_COMPONENT "bpf_jit" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include -#include #include #include #include -#include #include +#include "bpf_jit.h" -/* - * Conventions: - * %r2 = skb pointer - * %r3 = offset parameter - * %r4 = scratch register / length parameter - * %r5 = BPF A accumulator - * %r8 = return address - * %r9 = save register for skb pointer - * %r10 = skb->data - * %r11 = skb->len - skb->data_len (headlen) - * %r12 = BPF X accumulator - * %r13 = literal pool pointer - * 0(%r15) - 63(%r15) scratch memory array with BPF_MEMWORDS - */ int bpf_jit_enable __read_mostly; -/* - * assembly code in arch/x86/net/bpf_jit.S - */ -extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; -extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; - struct bpf_jit { - unsigned int seen; - u8 *start; - u8 *prg; - u8 *mid; - u8 *lit; - u8 *end; - u8 *base_ip; - u8 *ret0_ip; - u8 *exit_ip; - unsigned int off_load_word; - unsigned int off_load_half; - unsigned int off_load_byte; - unsigned int off_load_bmsh; - unsigned int off_load_iword; - unsigned int off_load_ihalf; - unsigned int off_load_ibyte; + u32 seen; /* Flags to remember seen eBPF instructions */ + u32 seen_reg[16]; /* Array to remember which registers are used */ + u32 *addrs; /* Array with relative instruction addresses */ + u8 *prg_buf; /* Start of program */ + int size; /* Size of program and literal pool */ + int size_prg; /* Size of program */ + int prg; /* Current position in program */ + int lit_start; /* Start of literal pool */ + int lit; /* Current position in literal pool */ + int base_ip; /* Base address for literal pool */ + int ret0_ip; /* Address of return 0 */ + int exit_ip; /* Address of exit */ }; #define BPF_SIZE_MAX 4096 /* Max size for program */ -#define SEEN_DATAREF 1 /* might call external helpers */ -#define SEEN_XREG 2 /* ebx is used */ -#define SEEN_MEM 4 /* use mem[] for temporary storage */ -#define SEEN_RET0 8 /* pc_ret0 points to a valid return 0 */ -#define SEEN_LITERAL 16 /* code uses literals */ -#define SEEN_LOAD_WORD 32 /* code uses sk_load_word */ -#define SEEN_LOAD_HALF 64 /* code uses sk_load_half */ -#define SEEN_LOAD_BYTE 128 /* code uses sk_load_byte */ -#define SEEN_LOAD_BMSH 256 /* code uses sk_load_byte_msh */ -#define SEEN_LOAD_IWORD 512 /* code uses sk_load_word_ind */ -#define SEEN_LOAD_IHALF 1024 /* code uses sk_load_half_ind */ -#define SEEN_LOAD_IBYTE 2048 /* code uses sk_load_byte_ind */ +#define SEEN_SKB 1 /* skb access */ +#define SEEN_MEM 2 /* use mem[] for temporary storage */ +#define SEEN_RET0 4 /* ret0_ip points to a valid return 0 */ +#define SEEN_LITERAL 8 /* code uses literals */ +#define SEEN_FUNC 16 /* calls C functions */ +#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) -#define EMIT2(op) \ -({ \ - if (jit->prg + 2 <= jit->mid) \ - *(u16 *) jit->prg = op; \ - jit->prg += 2; \ -}) +/* + * s390 registers + */ +#define REG_W0 (__MAX_BPF_REG+0) /* Work register 1 (even) */ +#define REG_W1 (__MAX_BPF_REG+1) /* Work register 2 (odd) */ +#define REG_SKB_DATA (__MAX_BPF_REG+2) /* SKB data register */ +#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */ +#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */ +#define REG_0 REG_W0 /* Register 0 */ +#define REG_2 BPF_REG_1 /* Register 2 */ +#define REG_14 BPF_REG_0 /* Register 14 */ -#define EMIT4(op) \ -({ \ - if (jit->prg + 4 <= jit->mid) \ - *(u32 *) jit->prg = op; \ - jit->prg += 4; \ -}) +/* + * Mapping of BPF registers to s390 registers + */ +static const int reg2hex[] = { + /* Return code */ + [BPF_REG_0] = 14, + /* Function parameters */ + [BPF_REG_1] = 2, + [BPF_REG_2] = 3, + [BPF_REG_3] = 4, + [BPF_REG_4] = 5, + [BPF_REG_5] = 6, + /* Call saved registers */ + [BPF_REG_6] = 7, + [BPF_REG_7] = 8, + [BPF_REG_8] = 9, + [BPF_REG_9] = 10, + /* BPF stack pointer */ + [BPF_REG_FP] = 13, + /* SKB data pointer */ + [REG_SKB_DATA] = 12, + /* Work registers for s390x backend */ + [REG_W0] = 0, + [REG_W1] = 1, + [REG_L] = 11, + [REG_15] = 15, +}; -#define EMIT4_DISP(op, disp) \ -({ \ - unsigned int __disp = (disp) & 0xfff; \ - EMIT4(op | __disp); \ -}) - -#define EMIT4_IMM(op, imm) \ -({ \ - unsigned int __imm = (imm) & 0xffff; \ - EMIT4(op | __imm); \ -}) - -#define EMIT4_PCREL(op, pcrel) \ -({ \ - long __pcrel = ((pcrel) >> 1) & 0xffff; \ - EMIT4(op | __pcrel); \ -}) - -#define EMIT6(op1, op2) \ -({ \ - if (jit->prg + 6 <= jit->mid) { \ - *(u32 *) jit->prg = op1; \ - *(u16 *) (jit->prg + 4) = op2; \ - } \ - jit->prg += 6; \ -}) - -#define EMIT6_DISP(op1, op2, disp) \ -({ \ - unsigned int __disp = (disp) & 0xfff; \ - EMIT6(op1 | __disp, op2); \ -}) - -#define EMIT6_IMM(op, imm) \ -({ \ - unsigned int __imm = (imm); \ - EMIT6(op | (__imm >> 16), __imm & 0xffff); \ -}) - -#define EMIT_CONST(val) \ -({ \ - unsigned int ret; \ - ret = (unsigned int) (jit->lit - jit->base_ip); \ - jit->seen |= SEEN_LITERAL; \ - if (jit->lit + 4 <= jit->end) \ - *(u32 *) jit->lit = val; \ - jit->lit += 4; \ - ret; \ -}) - -#define EMIT_FN_CONST(bit, fn) \ -({ \ - unsigned int ret; \ - ret = (unsigned int) (jit->lit - jit->base_ip); \ - if (jit->seen & bit) { \ - jit->seen |= SEEN_LITERAL; \ - if (jit->lit + 8 <= jit->end) \ - *(void **) jit->lit = fn; \ - jit->lit += 8; \ - } \ - ret; \ -}) - -static void bpf_jit_fill_hole(void *area, unsigned int size) +static inline u32 reg(u32 dst_reg, u32 src_reg) +{ + return reg2hex[dst_reg] << 4 | reg2hex[src_reg]; +} + +static inline u32 reg_high(u32 reg) +{ + return reg2hex[reg] << 4; +} + +static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) +{ + u32 r1 = reg2hex[b1]; + + if (!jit->seen_reg[r1] && r1 >= 6 && r1 <= 15) + jit->seen_reg[r1] = 1; +} + +#define REG_SET_SEEN(b1) \ +({ \ + reg_set_seen(jit, b1); \ +}) + +#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]] + +/* + * EMIT macros for code generation + */ + +#define _EMIT2(op) \ +({ \ + if (jit->prg_buf) \ + *(u16 *) (jit->prg_buf + jit->prg) = op; \ + jit->prg += 2; \ +}) + +#define EMIT2(op, b1, b2) \ +({ \ + _EMIT2(op | reg(b1, b2)); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ +}) + +#define _EMIT4(op) \ +({ \ + if (jit->prg_buf) \ + *(u32 *) (jit->prg_buf + jit->prg) = op; \ + jit->prg += 4; \ +}) + +#define EMIT4(op, b1, b2) \ +({ \ + _EMIT4(op | reg(b1, b2)); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ +}) + +#define EMIT4_RRF(op, b1, b2, b3) \ +({ \ + _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ + REG_SET_SEEN(b3); \ +}) + +#define _EMIT4_DISP(op, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + _EMIT4(op | __disp); \ +}) + +#define EMIT4_DISP(op, b1, b2, disp) \ +({ \ + _EMIT4_DISP(op | reg_high(b1) << 16 | \ + reg_high(b2) << 8, disp); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ +}) + +#define EMIT4_IMM(op, b1, imm) \ +({ \ + unsigned int __imm = (imm) & 0xffff; \ + _EMIT4(op | reg_high(b1) << 16 | __imm); \ + REG_SET_SEEN(b1); \ +}) + +#define EMIT4_PCREL(op, pcrel) \ +({ \ + long __pcrel = ((pcrel) >> 1) & 0xffff; \ + _EMIT4(op | __pcrel); \ +}) + +#define _EMIT6(op1, op2) \ +({ \ + if (jit->prg_buf) { \ + *(u32 *) (jit->prg_buf + jit->prg) = op1; \ + *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \ + } \ + jit->prg += 6; \ +}) + +#define _EMIT6_DISP(op1, op2, disp) \ +({ \ + unsigned int __disp = (disp) & 0xfff; \ + _EMIT6(op1 | __disp, op2); \ +}) + +#define EMIT6_DISP(op1, op2, b1, b2, b3, disp) \ +({ \ + _EMIT6_DISP(op1 | reg(b1, b2) << 16 | \ + reg_high(b3) << 8, op2, disp); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ + REG_SET_SEEN(b3); \ +}) + +#define _EMIT6_DISP_LH(op1, op2, disp) \ +({ \ + unsigned int __disp_h = ((u32)disp) & 0xff000; \ + unsigned int __disp_l = ((u32)disp) & 0x00fff; \ + _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \ +}) + +#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \ +({ \ + _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \ + reg_high(b3) << 8, op2, disp); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ + REG_SET_SEEN(b3); \ +}) + +#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ +({ \ + /* Branch instruction needs 6 bytes */ \ + int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ + _EMIT6(op1 | reg(b1, b2) << 16 | rel, op2 | mask); \ + REG_SET_SEEN(b1); \ + REG_SET_SEEN(b2); \ +}) + +#define _EMIT6_IMM(op, imm) \ +({ \ + unsigned int __imm = (imm); \ + _EMIT6(op | (__imm >> 16), __imm & 0xffff); \ +}) + +#define EMIT6_IMM(op, b1, imm) \ +({ \ + _EMIT6_IMM(op | reg_high(b1) << 16, imm); \ + REG_SET_SEEN(b1); \ +}) + +#define EMIT_CONST_U32(val) \ +({ \ + unsigned int ret; \ + ret = jit->lit - jit->base_ip; \ + jit->seen |= SEEN_LITERAL; \ + if (jit->prg_buf) \ + *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \ + jit->lit += 4; \ + ret; \ +}) + +#define EMIT_CONST_U64(val) \ +({ \ + unsigned int ret; \ + ret = jit->lit - jit->base_ip; \ + jit->seen |= SEEN_LITERAL; \ + if (jit->prg_buf) \ + *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \ + jit->lit += 8; \ + ret; \ +}) + +#define EMIT_ZERO(b1) \ +({ \ + /* llgfr %dst,%dst (zero extend to 64 bit) */ \ + EMIT4(0xb9160000, b1, b1); \ + REG_SET_SEEN(b1); \ +}) + +/* + * Fill whole space with illegal instructions + */ +static void jit_fill_hole(void *area, unsigned int size) { - /* Fill whole space with illegal instructions */ memset(area, 0, size); } +/* + * Save registers from "rs" (register start) to "re" (register end) on stack + */ +static void save_regs(struct bpf_jit *jit, u32 rs, u32 re) +{ + u32 off = 72 + (rs - 6) * 8; + + if (rs == re) + /* stg %rs,off(%r15) */ + _EMIT6(0xe300f000 | rs << 20 | off, 0x0024); + else + /* stmg %rs,%re,off(%r15) */ + _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off); +} + +/* + * Restore registers from "rs" (register start) to "re" (register end) on stack + */ +static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re) +{ + u32 off = 72 + (rs - 6) * 8; + + if (jit->seen & SEEN_STACK) + off += STK_OFF; + + if (rs == re) + /* lg %rs,off(%r15) */ + _EMIT6(0xe300f000 | rs << 20 | off, 0x0004); + else + /* lmg %rs,%re,off(%r15) */ + _EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off); +} + +/* + * Return first seen register (from start) + */ +static int get_start(struct bpf_jit *jit, int start) +{ + int i; + + for (i = start; i <= 15; i++) { + if (jit->seen_reg[i]) + return i; + } + return 0; +} + +/* + * Return last seen register (from start) (gap >= 2) + */ +static int get_end(struct bpf_jit *jit, int start) +{ + int i; + + for (i = start; i < 15; i++) { + if (!jit->seen_reg[i] && !jit->seen_reg[i + 1]) + return i - 1; + } + return jit->seen_reg[15] ? 15 : 14; +} + +#define REGS_SAVE 1 +#define REGS_RESTORE 0 +/* + * Save and restore clobbered registers (6-15) on stack. + * We save/restore registers in chunks with gap >= 2 registers. + */ +static void save_restore_regs(struct bpf_jit *jit, int op) +{ + + int re = 6, rs; + + do { + rs = get_start(jit, re); + if (!rs) + break; + re = get_end(jit, rs + 1); + if (op == REGS_SAVE) + save_regs(jit, rs, re); + else + restore_regs(jit, rs, re); + re++; + } while (re <= 15); +} + +/* + * Emit function prologue + * + * Save registers and create stack frame if necessary. + * See stack frame layout desription in "bpf_jit.h"! + */ static void bpf_jit_prologue(struct bpf_jit *jit) { - /* Save registers and create stack frame if necessary */ - if (jit->seen & SEEN_DATAREF) { - /* stmg %r8,%r15,88(%r15) */ - EMIT6(0xeb8ff058, 0x0024); - /* lgr %r14,%r15 */ - EMIT4(0xb90400ef); - /* aghi %r15, */ - EMIT4_IMM(0xa7fb0000, (jit->seen & SEEN_MEM) ? -112 : -80); - /* stg %r14,152(%r15) */ - EMIT6(0xe3e0f098, 0x0024); - } else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) - /* stmg %r12,%r13,120(%r15) */ - EMIT6(0xebcdf078, 0x0024); - else if (jit->seen & SEEN_XREG) - /* stg %r12,120(%r15) */ - EMIT6(0xe3c0f078, 0x0024); - else if (jit->seen & SEEN_LITERAL) - /* stg %r13,128(%r15) */ - EMIT6(0xe3d0f080, 0x0024); - + /* Save registers */ + save_restore_regs(jit, REGS_SAVE); /* Setup literal pool */ if (jit->seen & SEEN_LITERAL) { /* basr %r13,0 */ - EMIT2(0x0dd0); + EMIT2(0x0d00, REG_L, REG_0); jit->base_ip = jit->prg; } - jit->off_load_word = EMIT_FN_CONST(SEEN_LOAD_WORD, sk_load_word); - jit->off_load_half = EMIT_FN_CONST(SEEN_LOAD_HALF, sk_load_half); - jit->off_load_byte = EMIT_FN_CONST(SEEN_LOAD_BYTE, sk_load_byte); - jit->off_load_bmsh = EMIT_FN_CONST(SEEN_LOAD_BMSH, sk_load_byte_msh); - jit->off_load_iword = EMIT_FN_CONST(SEEN_LOAD_IWORD, sk_load_word_ind); - jit->off_load_ihalf = EMIT_FN_CONST(SEEN_LOAD_IHALF, sk_load_half_ind); - jit->off_load_ibyte = EMIT_FN_CONST(SEEN_LOAD_IBYTE, sk_load_byte_ind); - - /* Filter needs to access skb data */ - if (jit->seen & SEEN_DATAREF) { - /* l %r11,(%r2) */ - EMIT4_DISP(0x58b02000, offsetof(struct sk_buff, len)); - /* s %r11,(%r2) */ - EMIT4_DISP(0x5bb02000, offsetof(struct sk_buff, data_len)); - /* lg %r10,(%r2) */ - EMIT6_DISP(0xe3a02000, 0x0004, - offsetof(struct sk_buff, data)); + /* Setup stack and backchain */ + if (jit->seen & SEEN_STACK) { + /* lgr %bfp,%r15 (BPF frame pointer) */ + EMIT4(0xb9040000, BPF_REG_FP, REG_15); + /* aghi %r15,-STK_OFF */ + EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF); + if (jit->seen & SEEN_FUNC) + /* stg %bfp,152(%r15) (backchain) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0, + REG_15, 152); } + /* + * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" + * we store the SKB header length on the stack and the SKB data + * pointer in REG_SKB_DATA. + */ + if (jit->seen & SEEN_SKB) { + /* Header length: llgf %w1,(%b1) */ + EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_1, + offsetof(struct sk_buff, len)); + /* s %w1,(%b1) */ + EMIT4_DISP(0x5b000000, REG_W1, BPF_REG_1, + offsetof(struct sk_buff, data_len)); + /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, + STK_OFF_HLEN); + /* lg %skb_data,data_off(%b1) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, + BPF_REG_1, offsetof(struct sk_buff, data)); + } + /* BPF compatibility: clear A (%b7) and X (%b8) registers */ + if (REG_SEEN(BPF_REG_7)) + /* lghi %b7,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_7, 0); + if (REG_SEEN(BPF_REG_8)) + /* lghi %b8,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_8, 0); } +/* + * Function epilogue + */ static void bpf_jit_epilogue(struct bpf_jit *jit) { /* Return 0 */ if (jit->seen & SEEN_RET0) { jit->ret0_ip = jit->prg; - /* lghi %r2,0 */ - EMIT4(0xa7290000); + /* lghi %b0,0 */ + EMIT4_IMM(0xa7090000, BPF_REG_0, 0); } jit->exit_ip = jit->prg; + /* Load exit code: lgr %r2,%b0 */ + EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ - if (jit->seen & SEEN_DATAREF) - /* lmg %r8,%r15,(%r15) */ - EMIT6_DISP(0xeb8ff000, 0x0004, - (jit->seen & SEEN_MEM) ? 200 : 168); - else if ((jit->seen & SEEN_XREG) && (jit->seen & SEEN_LITERAL)) - /* lmg %r12,%r13,120(%r15) */ - EMIT6(0xebcdf078, 0x0004); - else if (jit->seen & SEEN_XREG) - /* lg %r12,120(%r15) */ - EMIT6(0xe3c0f078, 0x0004); - else if (jit->seen & SEEN_LITERAL) - /* lg %r13,128(%r15) */ - EMIT6(0xe3d0f080, 0x0004); + save_restore_regs(jit, REGS_RESTORE); /* br %r14 */ - EMIT2(0x07fe); + _EMIT2(0x07fe); } /* - * make sure we dont leak kernel information to user + * Compile one eBPF instruction into s390x code */ -static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter) +static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i) { - /* Clear temporary memory if (seen & SEEN_MEM) */ - if (jit->seen & SEEN_MEM) - /* xc 0(64,%r15),0(%r15) */ - EMIT6(0xd73ff000, 0xf000); - /* Clear X if (seen & SEEN_XREG) */ - if (jit->seen & SEEN_XREG) - /* lhi %r12,0 */ - EMIT4(0xa7c80000); - /* Clear A if the first register does not set it. */ - switch (filter[0].code) { - case BPF_LD | BPF_W | BPF_ABS: - case BPF_LD | BPF_H | BPF_ABS: - case BPF_LD | BPF_B | BPF_ABS: - case BPF_LD | BPF_W | BPF_LEN: - case BPF_LD | BPF_W | BPF_IND: - case BPF_LD | BPF_H | BPF_IND: - case BPF_LD | BPF_B | BPF_IND: - case BPF_LD | BPF_IMM: - case BPF_LD | BPF_MEM: - case BPF_MISC | BPF_TXA: - case BPF_RET | BPF_K: - /* first instruction sets A register */ + struct bpf_insn *insn = &fp->insnsi[i]; + int jmp_off, last, insn_count = 1; + unsigned int func_addr, mask; + u32 dst_reg = insn->dst_reg; + u32 src_reg = insn->src_reg; + u32 *addrs = jit->addrs; + s32 imm = insn->imm; + s16 off = insn->off; + + switch (insn->code) { + /* + * BPF_MOV + */ + case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */ + /* llgfr %dst,%src */ + EMIT4(0xb9160000, dst_reg, src_reg); + break; + case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ + /* lgr %dst,%src */ + EMIT4(0xb9040000, dst_reg, src_reg); + break; + case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */ + /* llilf %dst,imm */ + EMIT6_IMM(0xc00f0000, dst_reg, imm); + break; + case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */ + /* lgfi %dst,imm */ + EMIT6_IMM(0xc0010000, dst_reg, imm); + break; + /* + * BPF_LD 64 + */ + case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + { + /* 16 byte instruction that uses two 'struct bpf_insn' */ + u64 imm64; + + imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32; + /* lg %dst,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm64)); + insn_count = 2; break; - default: /* A = 0 */ - /* lhi %r5,0 */ - EMIT4(0xa7580000); } -} + /* + * BPF_ADD + */ + case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */ + /* ar %dst,%src */ + EMIT2(0x1a00, dst_reg, src_reg); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */ + /* agr %dst,%src */ + EMIT4(0xb9080000, dst_reg, src_reg); + break; + case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */ + if (!imm) + break; + /* alfi %dst,imm */ + EMIT6_IMM(0xc20b0000, dst_reg, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */ + if (!imm) + break; + /* agfi %dst,imm */ + EMIT6_IMM(0xc2080000, dst_reg, imm); + break; + /* + * BPF_SUB + */ + case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */ + /* sr %dst,%src */ + EMIT2(0x1b00, dst_reg, src_reg); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */ + /* sgr %dst,%src */ + EMIT4(0xb9090000, dst_reg, src_reg); + break; + case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */ + if (!imm) + break; + /* alfi %dst,-imm */ + EMIT6_IMM(0xc20b0000, dst_reg, -imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */ + if (!imm) + break; + /* agfi %dst,-imm */ + EMIT6_IMM(0xc2080000, dst_reg, -imm); + break; + /* + * BPF_MUL + */ + case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */ + /* msr %dst,%src */ + EMIT4(0xb2520000, dst_reg, src_reg); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */ + /* msgr %dst,%src */ + EMIT4(0xb90c0000, dst_reg, src_reg); + break; + case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */ + if (imm == 1) + break; + /* msfi %r5,imm */ + EMIT6_IMM(0xc2010000, dst_reg, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */ + if (imm == 1) + break; + /* msgfi %dst,imm */ + EMIT6_IMM(0xc2000000, dst_reg, imm); + break; + /* + * BPF_DIV / BPF_MOD + */ + case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */ + case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */ + { + int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; -static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter, - unsigned int *addrs, int i, int last) -{ - unsigned int K; - int offset; - unsigned int mask; - u16 code; - - K = filter->k; - code = bpf_anc_helper(filter); - - switch (code) { - case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ - jit->seen |= SEEN_XREG; - /* ar %r5,%r12 */ - EMIT2(0x1a5c); - break; - case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ - if (!K) - break; - if (K <= 16383) - /* ahi %r5, */ - EMIT4_IMM(0xa75a0000, K); - else if (test_facility(21)) - /* alfi %r5, */ - EMIT6_IMM(0xc25b0000, K); - else - /* a %r5,(%r13) */ - EMIT4_DISP(0x5a50d000, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ - jit->seen |= SEEN_XREG; - /* sr %r5,%r12 */ - EMIT2(0x1b5c); - break; - case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ - if (!K) - break; - if (K <= 16384) - /* ahi %r5,-K */ - EMIT4_IMM(0xa75a0000, -K); - else if (test_facility(21)) - /* alfi %r5,-K */ - EMIT6_IMM(0xc25b0000, -K); - else - /* s %r5,(%r13) */ - EMIT4_DISP(0x5b50d000, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ - jit->seen |= SEEN_XREG; - /* msr %r5,%r12 */ - EMIT4(0xb252005c); - break; - case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ - if (K <= 16383) - /* mhi %r5,K */ - EMIT4_IMM(0xa75c0000, K); - else if (test_facility(34)) - /* msfi %r5, */ - EMIT6_IMM(0xc2510000, K); - else - /* ms %r5,(%r13) */ - EMIT4_DISP(0x7150d000, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ - jit->seen |= SEEN_XREG | SEEN_RET0; - /* ltr %r12,%r12 */ - EMIT2(0x12cc); - /* jz */ - EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); - /* lhi %r4,0 */ - EMIT4(0xa7480000); - /* dlr %r4,%r12 */ - EMIT4(0xb997004c); - break; - case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */ - if (K == 1) - break; - /* lhi %r4,0 */ - EMIT4(0xa7480000); - /* dl %r4,(%r13) */ - EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ - jit->seen |= SEEN_XREG | SEEN_RET0; - /* ltr %r12,%r12 */ - EMIT2(0x12cc); - /* jz */ - EMIT4_PCREL(0xa7840000, (jit->ret0_ip - jit->prg)); - /* lhi %r4,0 */ - EMIT4(0xa7480000); - /* dlr %r4,%r12 */ - EMIT4(0xb997004c); - /* lr %r5,%r4 */ - EMIT2(0x1854); - break; - case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */ - if (K == 1) { - /* lhi %r5,0 */ - EMIT4(0xa7580000); - break; - } - /* lhi %r4,0 */ - EMIT4(0xa7480000); - /* dl %r4,(%r13) */ - EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K)); - /* lr %r5,%r4 */ - EMIT2(0x1854); - break; - case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ - jit->seen |= SEEN_XREG; - /* nr %r5,%r12 */ - EMIT2(0x145c); - break; - case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ - if (test_facility(21)) - /* nilf %r5, */ - EMIT6_IMM(0xc05b0000, K); - else - /* n %r5,(%r13) */ - EMIT4_DISP(0x5450d000, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ - jit->seen |= SEEN_XREG; - /* or %r5,%r12 */ - EMIT2(0x165c); - break; - case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ - if (test_facility(21)) - /* oilf %r5, */ - EMIT6_IMM(0xc05d0000, K); - else - /* o %r5,(%r13) */ - EMIT4_DISP(0x5650d000, EMIT_CONST(K)); - break; - case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */ - case BPF_ALU | BPF_XOR | BPF_X: - jit->seen |= SEEN_XREG; - /* xr %r5,%r12 */ - EMIT2(0x175c); - break; - case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */ - if (!K) - break; - /* x %r5,(%r13) */ - EMIT4_DISP(0x5750d000, EMIT_CONST(K)); - break; - case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */ - jit->seen |= SEEN_XREG; - /* sll %r5,0(%r12) */ - EMIT4(0x8950c000); - break; - case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ - if (K == 0) - break; - /* sll %r5,K */ - EMIT4_DISP(0x89500000, K); - break; - case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */ - jit->seen |= SEEN_XREG; - /* srl %r5,0(%r12) */ - EMIT4(0x8850c000); - break; - case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */ - if (K == 0) - break; - /* srl %r5,K */ - EMIT4_DISP(0x88500000, K); - break; - case BPF_ALU | BPF_NEG: /* A = -A */ - /* lcr %r5,%r5 */ - EMIT2(0x1355); - break; - case BPF_JMP | BPF_JA: /* ip += K */ - offset = addrs[i + K] + jit->start - jit->prg; - EMIT4_PCREL(0xa7f40000, offset); - break; - case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */ - mask = 0x200000; /* jh */ - goto kbranch; - case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */ - mask = 0xa00000; /* jhe */ - goto kbranch; - case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */ - mask = 0x800000; /* je */ -kbranch: /* Emit compare if the branch targets are different */ - if (filter->jt != filter->jf) { - if (test_facility(21)) - /* clfi %r5, */ - EMIT6_IMM(0xc25f0000, K); - else - /* cl %r5,(%r13) */ - EMIT4_DISP(0x5550d000, EMIT_CONST(K)); - } -branch: if (filter->jt == filter->jf) { - if (filter->jt == 0) - break; - /* j */ - offset = addrs[i + filter->jt] + jit->start - jit->prg; - EMIT4_PCREL(0xa7f40000, offset); - break; - } - if (filter->jt != 0) { - /* brc , */ - offset = addrs[i + filter->jt] + jit->start - jit->prg; - EMIT4_PCREL(0xa7040000 | mask, offset); - } - if (filter->jf != 0) { - /* brc , */ - offset = addrs[i + filter->jf] + jit->start - jit->prg; - EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset); - } - break; - case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */ - mask = 0x700000; /* jnz */ - /* Emit test if the branch targets are different */ - if (filter->jt != filter->jf) { - if (K > 65535) { - /* lr %r4,%r5 */ - EMIT2(0x1845); - /* n %r4,(%r13) */ - EMIT4_DISP(0x5440d000, EMIT_CONST(K)); - } else - /* tmll %r5,K */ - EMIT4_IMM(0xa7510000, K); - } - goto branch; - case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */ - mask = 0x200000; /* jh */ - goto xbranch; - case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */ - mask = 0xa00000; /* jhe */ - goto xbranch; - case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */ - mask = 0x800000; /* je */ -xbranch: /* Emit compare if the branch targets are different */ - if (filter->jt != filter->jf) { - jit->seen |= SEEN_XREG; - /* clr %r5,%r12 */ - EMIT2(0x155c); - } - goto branch; - case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */ - mask = 0x700000; /* jnz */ - /* Emit test if the branch targets are different */ - if (filter->jt != filter->jf) { - jit->seen |= SEEN_XREG; - /* lr %r4,%r5 */ - EMIT2(0x1845); - /* nr %r4,%r12 */ - EMIT2(0x144c); - } - goto branch; - case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD; - offset = jit->off_load_word; - goto load_abs; - case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF; - offset = jit->off_load_half; - goto load_abs; - case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE; - offset = jit->off_load_byte; -load_abs: if ((int) K < 0) - goto out; -call_fn: /* lg %r1,(%r13) */ - EMIT6_DISP(0xe310d000, 0x0004, offset); - /* l %r3,(%r13) */ - EMIT4_DISP(0x5830d000, EMIT_CONST(K)); - /* basr %r8,%r1 */ - EMIT2(0x0d81); - /* jnz */ - EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg)); - break; - case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD; - offset = jit->off_load_iword; - goto call_fn; - case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF; - offset = jit->off_load_ihalf; - goto call_fn; - case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */ - jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE; - offset = jit->off_load_ibyte; - goto call_fn; - case BPF_LDX | BPF_B | BPF_MSH: - /* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */ jit->seen |= SEEN_RET0; - if ((int) K < 0) { - /* j */ - EMIT4_PCREL(0xa7f40000, (jit->ret0_ip - jit->prg)); + /* ltr %src,%src (if src == 0 goto fail) */ + EMIT2(0x1200, src_reg, src_reg); + /* jz */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lhi %w0,0 */ + EMIT4_IMM(0xa7080000, REG_W0, 0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dlr %w0,%src */ + EMIT4(0xb9970000, REG_W0, src_reg); + /* llgfr %dst,%rc */ + EMIT4(0xb9160000, dst_reg, rc_reg); + break; + } + case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */ + case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */ + { + int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; + + jit->seen |= SEEN_RET0; + /* ltgr %src,%src (if src == 0 goto fail) */ + EMIT4(0xb9020000, src_reg, src_reg); + /* jz */ + EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* llgfr %dst,%src (u32 cast) */ + EMIT4(0xb9160000, dst_reg, src_reg); + /* dlgr %w0,%dst */ + EMIT4(0xb9870000, REG_W0, dst_reg); + /* lgr %dst,%rc */ + EMIT4(0xb9040000, dst_reg, rc_reg); + break; + } + case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */ + case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */ + { + int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; + + if (imm == 1) { + if (BPF_OP(insn->code) == BPF_MOD) + /* lhgi %dst,0 */ + EMIT4_IMM(0xa7090000, dst_reg, 0); break; } - jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH; - offset = jit->off_load_bmsh; - goto call_fn; - case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len)); + /* lhi %w0,0 */ + EMIT4_IMM(0xa7080000, REG_W0, 0); + /* lr %w1,%dst */ + EMIT2(0x1800, REG_W1, dst_reg); + /* dl %w0,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, + EMIT_CONST_U32(imm)); + /* llgfr %dst,%rc */ + EMIT4(0xb9160000, dst_reg, rc_reg); break; - case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ - jit->seen |= SEEN_XREG; - /* l %r12,(%r2) */ - EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len)); + } + case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */ + case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */ + { + int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0; + + if (imm == 1) { + if (BPF_OP(insn->code) == BPF_MOD) + /* lhgi %dst,0 */ + EMIT4_IMM(0xa7090000, dst_reg, 0); + break; + } + /* lghi %w0,0 */ + EMIT4_IMM(0xa7090000, REG_W0, 0); + /* lgr %w1,%dst */ + EMIT4(0xb9040000, REG_W1, dst_reg); + /* dlg %w0,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, + EMIT_CONST_U64((u32) imm)); + /* lgr %dst,%rc */ + EMIT4(0xb9040000, dst_reg, rc_reg); break; - case BPF_LD | BPF_IMM: /* A = K */ - if (K <= 16383) - /* lhi %r5,K */ - EMIT4_IMM(0xa7580000, K); - else if (test_facility(21)) - /* llilf %r5, */ - EMIT6_IMM(0xc05f0000, K); - else - /* l %r5,(%r13) */ - EMIT4_DISP(0x5850d000, EMIT_CONST(K)); + } + /* + * BPF_AND + */ + case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */ + /* nr %dst,%src */ + EMIT2(0x1400, dst_reg, src_reg); + EMIT_ZERO(dst_reg); break; - case BPF_LDX | BPF_IMM: /* X = K */ - jit->seen |= SEEN_XREG; - if (K <= 16383) - /* lhi %r12, */ - EMIT4_IMM(0xa7c80000, K); - else if (test_facility(21)) - /* llilf %r12, */ - EMIT6_IMM(0xc0cf0000, K); - else - /* l %r12,(%r13) */ - EMIT4_DISP(0x58c0d000, EMIT_CONST(K)); + case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ + /* ngr %dst,%src */ + EMIT4(0xb9800000, dst_reg, src_reg); break; - case BPF_LD | BPF_MEM: /* A = mem[K] */ - jit->seen |= SEEN_MEM; - /* l %r5,(%r15) */ - EMIT4_DISP(0x5850f000, - (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */ + /* nilf %dst,imm */ + EMIT6_IMM(0xc00b0000, dst_reg, imm); + EMIT_ZERO(dst_reg); break; - case BPF_LDX | BPF_MEM: /* X = mem[K] */ - jit->seen |= SEEN_XREG | SEEN_MEM; - /* l %r12,(%r15) */ - EMIT4_DISP(0x58c0f000, - (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ + /* ng %dst,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); break; - case BPF_MISC | BPF_TAX: /* X = A */ - jit->seen |= SEEN_XREG; - /* lr %r12,%r5 */ - EMIT2(0x18c5); + /* + * BPF_OR + */ + case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ + /* or %dst,%src */ + EMIT2(0x1600, dst_reg, src_reg); + EMIT_ZERO(dst_reg); break; - case BPF_MISC | BPF_TXA: /* A = X */ - jit->seen |= SEEN_XREG; - /* lr %r5,%r12 */ - EMIT2(0x185c); + case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ + /* ogr %dst,%src */ + EMIT4(0xb9810000, dst_reg, src_reg); break; - case BPF_RET | BPF_K: - if (K == 0) { - jit->seen |= SEEN_RET0; - if (last) - break; - /* j */ - EMIT4_PCREL(0xa7f40000, jit->ret0_ip - jit->prg); - } else { - if (K <= 16383) - /* lghi %r2,K */ - EMIT4_IMM(0xa7290000, K); - else - /* llgf %r2,(%r13) */ - EMIT6_DISP(0xe320d000, 0x0016, EMIT_CONST(K)); - /* j */ - if (last && !(jit->seen & SEEN_RET0)) - break; - EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); + case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */ + /* oilf %dst,imm */ + EMIT6_IMM(0xc00d0000, dst_reg, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */ + /* og %dst,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); + break; + /* + * BPF_XOR + */ + case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */ + /* xr %dst,%src */ + EMIT2(0x1700, dst_reg, src_reg); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */ + /* xgr %dst,%src */ + EMIT4(0xb9820000, dst_reg, src_reg); + break; + case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */ + if (!imm) + break; + /* xilf %dst,imm */ + EMIT6_IMM(0xc0070000, dst_reg, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ + /* xg %dst,(%l) */ + EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L, + EMIT_CONST_U64(imm)); + break; + /* + * BPF_LSH + */ + case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */ + /* sll %dst,0(%src) */ + EMIT4_DISP(0x89000000, dst_reg, src_reg, 0); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */ + /* sllg %dst,%dst,0(%src) */ + EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0); + break; + case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */ + if (imm == 0) + break; + /* sll %dst,imm(%r0) */ + EMIT4_DISP(0x89000000, dst_reg, REG_0, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */ + if (imm == 0) + break; + /* sllg %dst,%dst,imm(%r0) */ + EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm); + break; + /* + * BPF_RSH + */ + case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */ + /* srl %dst,0(%src) */ + EMIT4_DISP(0x88000000, dst_reg, src_reg, 0); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */ + /* srlg %dst,%dst,0(%src) */ + EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0); + break; + case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */ + if (imm == 0) + break; + /* srl %dst,imm(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, imm); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */ + if (imm == 0) + break; + /* srlg %dst,%dst,imm(%r0) */ + EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm); + break; + /* + * BPF_ARSH + */ + case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */ + /* srag %dst,%dst,0(%src) */ + EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0); + break; + case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */ + if (imm == 0) + break; + /* srag %dst,%dst,imm(%r0) */ + EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm); + break; + /* + * BPF_NEG + */ + case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */ + /* lcr %dst,%dst */ + EMIT2(0x1300, dst_reg, dst_reg); + EMIT_ZERO(dst_reg); + break; + case BPF_ALU64 | BPF_NEG: /* dst = -dst */ + /* lcgr %dst,%dst */ + EMIT4(0xb9130000, dst_reg, dst_reg); + break; + /* + * BPF_FROM_BE/LE + */ + case BPF_ALU | BPF_END | BPF_FROM_BE: + /* s390 is big endian, therefore only clear high order bytes */ + switch (imm) { + case 16: /* dst = (u16) cpu_to_be16(dst) */ + /* llghr %dst,%dst */ + EMIT4(0xb9850000, dst_reg, dst_reg); + break; + case 32: /* dst = (u32) cpu_to_be32(dst) */ + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + case 64: /* dst = (u64) cpu_to_be64(dst) */ + break; } break; - case BPF_RET | BPF_A: - /* llgfr %r2,%r5 */ - EMIT4(0xb9160025); + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: /* dst = (u16) cpu_to_le16(dst) */ + /* lrvr %dst,%dst */ + EMIT4(0xb91f0000, dst_reg, dst_reg); + /* srl %dst,16(%r0) */ + EMIT4_DISP(0x88000000, dst_reg, REG_0, 16); + /* llghr %dst,%dst */ + EMIT4(0xb9850000, dst_reg, dst_reg); + break; + case 32: /* dst = (u32) cpu_to_le32(dst) */ + /* lrvr %dst,%dst */ + EMIT4(0xb91f0000, dst_reg, dst_reg); + /* llgfr %dst,%dst */ + EMIT4(0xb9160000, dst_reg, dst_reg); + break; + case 64: /* dst = (u64) cpu_to_le64(dst) */ + /* lrvgr %dst,%dst */ + EMIT4(0xb90f0000, dst_reg, dst_reg); + break; + } + break; + /* + * BPF_ST(X) + */ + case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */ + /* stcy %src,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ + /* sthy %src,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ + /* sty %src,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ + /* stg %src,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ + /* lhi %w0,imm */ + EMIT4_IMM(0xa7080000, REG_W0, (u8) imm); + /* stcy %w0,off(dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ + /* lhi %w0,imm */ + EMIT4_IMM(0xa7080000, REG_W0, (u16) imm); + /* sthy %w0,off(dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ + /* llilf %w0,imm */ + EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm); + /* sty %w0,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ + /* lgfi %w0,imm */ + EMIT6_IMM(0xc0010000, REG_W0, imm); + /* stg %w0,off(%dst) */ + EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + /* + * BPF_STX XADD (atomic_add) + */ + case BPF_STX | BPF_XADD | BPF_W: /* *(u32 *)(dst + off) += src */ + /* laal %w0,%src,off(%dst) */ + EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W0, src_reg, + dst_reg, off); + jit->seen |= SEEN_MEM; + break; + case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src */ + /* laalg %w0,%src,off(%dst) */ + EMIT6_DISP_LH(0xeb000000, 0x00ea, REG_W0, src_reg, + dst_reg, off); + jit->seen |= SEEN_MEM; + break; + /* + * BPF_LDX + */ + case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */ + /* llgc %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */ + /* llgh %dst,0(off,%src) */ + EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off); + jit->seen |= SEEN_MEM; + break; + case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */ + /* llgf %dst,off(%src) */ + jit->seen |= SEEN_MEM; + EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off); + break; + case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */ + /* lg %dst,0(off,%src) */ + jit->seen |= SEEN_MEM; + EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off); + break; + /* + * BPF_JMP / CALL + */ + case BPF_JMP | BPF_CALL: + { + /* + * b0 = (__bpf_call_base + imm)(b1, b2, b3, b4, b5) + */ + const u64 func = (u64)__bpf_call_base + imm; + + REG_SET_SEEN(BPF_REG_5); + jit->seen |= SEEN_FUNC; + /* lg %w1,(%l) */ + EMIT6_DISP(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, + EMIT_CONST_U64(func)); + /* basr %r14,%w1 */ + EMIT2(0x0d00, REG_14, REG_W1); + /* lgr %b0,%r2: load return value into %b0 */ + EMIT4(0xb9040000, BPF_REG_0, REG_2); + break; + } + case BPF_JMP | BPF_EXIT: /* return b0 */ + last = (i == fp->len - 1) ? 1 : 0; + if (last && !(jit->seen & SEEN_RET0)) + break; /* j */ EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); break; - case BPF_ST: /* mem[K] = A */ - jit->seen |= SEEN_MEM; - /* st %r5,(%r15) */ - EMIT4_DISP(0x5050f000, - (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); + /* + * Branch relative (number of skipped instructions) to offset on + * condition. + * + * Condition code to mask mapping: + * + * CC | Description | Mask + * ------------------------------ + * 0 | Operands equal | 8 + * 1 | First operand low | 4 + * 2 | First operand high | 2 + * 3 | Unused | 1 + * + * For s390x relative branches: ip = ip + off_bytes + * For BPF relative branches: insn = insn + off_insns + 1 + * + * For example for s390x with offset 0 we jump to the branch + * instruction itself (loop) and for BPF with offset 0 we + * branch to the instruction behind the branch. + */ + case BPF_JMP | BPF_JA: /* if (true) */ + mask = 0xf000; /* j */ + goto branch_oc; + case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */ + mask = 0x2000; /* jh */ + goto branch_ks; + case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */ + mask = 0xa000; /* jhe */ + goto branch_ks; + case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */ + mask = 0x2000; /* jh */ + goto branch_ku; + case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */ + mask = 0xa000; /* jhe */ + goto branch_ku; + case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */ + mask = 0x7000; /* jne */ + goto branch_ku; + case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */ + mask = 0x8000; /* je */ + goto branch_ku; + case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */ + mask = 0x7000; /* jnz */ + /* lgfi %w1,imm (load sign extend imm) */ + EMIT6_IMM(0xc0010000, REG_W1, imm); + /* ngr %w1,%dst */ + EMIT4(0xb9800000, REG_W1, dst_reg); + goto branch_oc; + + case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */ + mask = 0x2000; /* jh */ + goto branch_xs; + case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */ + mask = 0xa000; /* jhe */ + goto branch_xs; + case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */ + mask = 0x2000; /* jh */ + goto branch_xu; + case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */ + mask = 0xa000; /* jhe */ + goto branch_xu; + case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */ + mask = 0x7000; /* jne */ + goto branch_xu; + case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */ + mask = 0x8000; /* je */ + goto branch_xu; + case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */ + mask = 0x7000; /* jnz */ + /* ngrk %w1,%dst,%src */ + EMIT4_RRF(0xb9e40000, REG_W1, dst_reg, src_reg); + goto branch_oc; +branch_ks: + /* lgfi %w1,imm (load sign extend imm) */ + EMIT6_IMM(0xc0010000, REG_W1, imm); + /* cgrj %dst,%w1,mask,off */ + EMIT6_PCREL(0xec000000, 0x0064, dst_reg, REG_W1, i, off, mask); break; - case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */ - jit->seen |= SEEN_XREG | SEEN_MEM; - /* st %r12,(%r15) */ - EMIT4_DISP(0x50c0f000, - (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4); +branch_ku: + /* lgfi %w1,imm (load sign extend imm) */ + EMIT6_IMM(0xc0010000, REG_W1, imm); + /* clgrj %dst,%w1,mask,off */ + EMIT6_PCREL(0xec000000, 0x0065, dst_reg, REG_W1, i, off, mask); break; - case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - /* lhi %r5,0 */ - EMIT4(0xa7580000); - /* icm %r5,3,(%r2) */ - EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol)); +branch_xs: + /* cgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, 0x0064, dst_reg, src_reg, i, off, mask); break; - case BPF_ANC | SKF_AD_IFINDEX: /* if (!skb->dev) return 0; - * A = skb->dev->ifindex */ - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - jit->seen |= SEEN_RET0; - /* lg %r1,(%r2) */ - EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); - /* ltgr %r1,%r1 */ - EMIT4(0xb9020011); - /* jz */ - EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); - /* l %r5,(%r1) */ - EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex)); +branch_xu: + /* clgrj %dst,%src,mask,off */ + EMIT6_PCREL(0xec000000, 0x0065, dst_reg, src_reg, i, off, mask); break; - case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark)); +branch_oc: + /* brc mask,jmp_off (branch instruction needs 4 bytes) */ + jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); + EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); break; - case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); - /* lhi %r5,0 */ - EMIT4(0xa7580000); - /* icm %r5,3,(%r2) */ - EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping)); - break; - case BPF_ANC | SKF_AD_HATYPE: /* if (!skb->dev) return 0; - * A = skb->dev->type */ - BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - jit->seen |= SEEN_RET0; - /* lg %r1,(%r2) */ - EMIT6_DISP(0xe3102000, 0x0004, offsetof(struct sk_buff, dev)); - /* ltgr %r1,%r1 */ - EMIT4(0xb9020011); - /* jz */ - EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg); - /* lhi %r5,0 */ - EMIT4(0xa7580000); - /* icm %r5,3,(%r1) */ - EMIT4_DISP(0xbf531000, offsetof(struct net_device, type)); - break; - case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */ - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); - /* l %r5,(%r2) */ - EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash)); - break; - case BPF_ANC | SKF_AD_VLAN_TAG: - case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: - BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); - BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - /* lhi %r5,0 */ - EMIT4(0xa7580000); - /* icm %r5,3,(%r2) */ - EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci)); - if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { - /* nill %r5,0xefff */ - EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT); - } else { - /* nill %r5,0x1000 */ - EMIT4_IMM(0xa5570000, VLAN_TAG_PRESENT); - /* srl %r5,12 */ - EMIT4_DISP(0x88500000, 12); - } - break; - case BPF_ANC | SKF_AD_PKTTYPE: - /* lhi %r5,0 */ - EMIT4(0xa7580000); - /* ic %r5,(%r2) */ - EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET()); - /* srl %r5,5 */ - EMIT4_DISP(0x88500000, 5); - break; - case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */ -#ifdef CONFIG_SMP - /* l %r5, */ - EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr)); -#else - /* lhi %r5,0 */ - EMIT4(0xa7580000); -#endif + /* + * BPF_LD + */ + case BPF_LD | BPF_ABS | BPF_B: /* b0 = *(u8 *) (skb->data+imm) */ + case BPF_LD | BPF_IND | BPF_B: /* b0 = *(u8 *) (skb->data+imm+src) */ + if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) + func_addr = __pa(sk_load_byte_pos); + else + func_addr = __pa(sk_load_byte); + goto call_fn; + case BPF_LD | BPF_ABS | BPF_H: /* b0 = *(u16 *) (skb->data+imm) */ + case BPF_LD | BPF_IND | BPF_H: /* b0 = *(u16 *) (skb->data+imm+src) */ + if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) + func_addr = __pa(sk_load_half_pos); + else + func_addr = __pa(sk_load_half); + goto call_fn; + case BPF_LD | BPF_ABS | BPF_W: /* b0 = *(u32 *) (skb->data+imm) */ + case BPF_LD | BPF_IND | BPF_W: /* b0 = *(u32 *) (skb->data+imm+src) */ + if ((BPF_MODE(insn->code) == BPF_ABS) && (imm >= 0)) + func_addr = __pa(sk_load_word_pos); + else + func_addr = __pa(sk_load_word); + goto call_fn; +call_fn: + jit->seen |= SEEN_SKB | SEEN_RET0 | SEEN_FUNC; + REG_SET_SEEN(REG_14); /* Return address of possible func call */ + + /* + * Implicit input: + * BPF_REG_6 (R7) : skb pointer + * REG_SKB_DATA (R12): skb data pointer + * + * Calculated input: + * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb + * BPF_REG_5 (R6) : return address + * + * Output: + * BPF_REG_0 (R14): data read from skb + * + * Scratch registers (BPF_REG_1-5) + */ + + /* Call function: llilf %w1,func_addr */ + EMIT6_IMM(0xc00f0000, REG_W1, func_addr); + + /* Offset: lgfi %b2,imm */ + EMIT6_IMM(0xc0010000, BPF_REG_2, imm); + if (BPF_MODE(insn->code) == BPF_IND) + /* agfr %b2,%src (%src is s32 here) */ + EMIT4(0xb9180000, BPF_REG_2, src_reg); + + /* basr %b5,%w1 (%b5 is call saved) */ + EMIT2(0x0d00, BPF_REG_5, REG_W1); + + /* + * Note: For fast access we jump directly after the + * jnz instruction from bpf_jit.S + */ + /* jnz */ + EMIT4_PCREL(0xa7740000, jit->ret0_ip - jit->prg); break; default: /* too complex, give up */ - goto out; + pr_err("Unknown opcode %02x\n", insn->code); + return -1; } - addrs[i] = jit->prg - jit->start; - return 0; -out: - return -1; + return insn_count; } +/* + * Compile eBPF program into s390x code + */ +static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) +{ + int i, insn_count; + + jit->lit = jit->lit_start; + jit->prg = 0; + + bpf_jit_prologue(jit); + for (i = 0; i < fp->len; i += insn_count) { + insn_count = bpf_jit_insn(jit, fp, i); + if (insn_count < 0) + return -1; + jit->addrs[i + 1] = jit->prg; /* Next instruction address */ + } + bpf_jit_epilogue(jit); + + jit->lit_start = jit->prg; + jit->size = jit->lit; + jit->size_prg = jit->prg; + return 0; +} + +/* + * Classic BPF function stub. BPF programs will be converted into + * eBPF and then bpf_int_jit_compile() will be called. + */ void bpf_jit_compile(struct bpf_prog *fp) { - struct bpf_binary_header *header = NULL; - unsigned long size, prg_len, lit_len; - struct bpf_jit jit, cjit; - unsigned int *addrs; - int pass, i; +} + +/* + * Compile eBPF program "fp" + */ +void bpf_int_jit_compile(struct bpf_prog *fp) +{ + struct bpf_binary_header *header; + struct bpf_jit jit; + int pass; if (!bpf_jit_enable) return; - addrs = kcalloc(fp->len, sizeof(*addrs), GFP_KERNEL); - if (addrs == NULL) + memset(&jit, 0, sizeof(jit)); + jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); + if (jit.addrs == NULL) return; - memset(&jit, 0, sizeof(cjit)); - memset(&cjit, 0, sizeof(cjit)); - - for (pass = 0; pass < 10; pass++) { - jit.prg = jit.start; - jit.lit = jit.mid; - - bpf_jit_prologue(&jit); - bpf_jit_noleaks(&jit, fp->insns); - for (i = 0; i < fp->len; i++) { - if (bpf_jit_insn(&jit, fp->insns + i, addrs, i, - i == fp->len - 1)) - goto out; - } - bpf_jit_epilogue(&jit); - if (jit.start) { - WARN_ON(jit.prg > cjit.prg || jit.lit > cjit.lit); - if (memcmp(&jit, &cjit, sizeof(jit)) == 0) - break; - } else if (jit.prg == cjit.prg && jit.lit == cjit.lit) { - prg_len = jit.prg - jit.start; - lit_len = jit.lit - jit.mid; - size = prg_len + lit_len; - if (size >= BPF_SIZE_MAX) - goto out; - header = bpf_jit_binary_alloc(size, &jit.start, - 2, bpf_jit_fill_hole); - if (!header) - goto out; - jit.prg = jit.mid = jit.start + prg_len; - jit.lit = jit.end = jit.start + prg_len + lit_len; - jit.base_ip += (unsigned long) jit.start; - jit.exit_ip += (unsigned long) jit.start; - jit.ret0_ip += (unsigned long) jit.start; - } - cjit = jit; + /* + * Three initial passes: + * - 1/2: Determine clobbered registers + * - 3: Calculate program size and addrs arrray + */ + for (pass = 1; pass <= 3; pass++) { + if (bpf_jit_prog(&jit, fp)) + goto free_addrs; } + /* + * Final pass: Allocate and generate program + */ + if (jit.size >= BPF_SIZE_MAX) + goto free_addrs; + header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); + if (!header) + goto free_addrs; + if (bpf_jit_prog(&jit, fp)) + goto free_addrs; if (bpf_jit_enable > 1) { - bpf_jit_dump(fp->len, jit.end - jit.start, pass, jit.start); - if (jit.start) - print_fn_code(jit.start, jit.mid - jit.start); + bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf); + if (jit.prg_buf) + print_fn_code(jit.prg_buf, jit.size_prg); } - if (jit.start) { + if (jit.prg_buf) { set_memory_ro((unsigned long)header, header->pages); - fp->bpf_func = (void *) jit.start; + fp->bpf_func = (void *) jit.prg_buf; fp->jited = true; } -out: - kfree(addrs); +free_addrs: + kfree(jit.addrs); } +/* + * Free eBPF program + */ void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; From 8741ce6d114873e482b1ef298d6537d18ff393d3 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 1 Apr 2015 18:49:11 +0200 Subject: [PATCH 3/8] s390/mm: Fix memory hotplug for unaligned standby memory Commit 27356f54c8c3 ("mm/hotplug: verify hotplug memory range") introduced a check that makes add_memory() only accept section size aligned memory. Therefore on z/VM systems, where standby memory is not aligned, no standby memory is registered at all. Example: #cp def store 3504M standby 2336M 00: CP Q V STORE 00: STORAGE = 3504M MAX = 6G INC = 8M STANDBY = 2336M RESERVED = 0 For this setup the following error message is printed: Section-unaligned hotplug range: start 0xdb000000, size 0x92000000 So fix this and register aligned memory in "sclp_cmd.c". This means that for the corner cases where the standby memory is not aligned we loose some memory. In order to inform the user about the potential loss of standby memory, we add a new message for each added standby block and print how much of the standby memory is usable, for example: sclp_cmd.4336b4: Standby memory at 0x50000000 (256M of 256M usable) sclp_cmd.4336b4: Standby memory at 0xb0000000 (256M of 256M usable) sclp_cmd.4336b4: Standby memory at 0xdb000000 (2048M of 2336M usable) We also ensure that a potential memory block that contains both "assigned" and "standby" memory cannot be setup offline. Reviewed-by: Gerald Schaefer Reviewed-by: Heiko Carstens Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_cmd.c | 48 ++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 6e14999f9e8f..7be782116dab 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -315,10 +315,29 @@ static int sclp_mem_change_state(unsigned long start, unsigned long size, rc |= sclp_assign_storage(incr->rn); else sclp_unassign_storage(incr->rn); + if (rc == 0) + incr->standby = online ? 0 : 1; } return rc ? -EIO : 0; } +static bool contains_standby_increment(unsigned long start, unsigned long end) +{ + struct memory_increment *incr; + unsigned long istart; + + list_for_each_entry(incr, &sclp_mem_list, list) { + istart = rn2addr(incr->rn); + if (end - 1 < istart) + continue; + if (start > istart + sclp_rzm - 1) + continue; + if (incr->standby) + return true; + } + return false; +} + static int sclp_mem_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -334,8 +353,16 @@ static int sclp_mem_notifier(struct notifier_block *nb, for_each_clear_bit(id, sclp_storage_ids, sclp_max_storage_id + 1) sclp_attach_storage(id); switch (action) { - case MEM_ONLINE: case MEM_GOING_OFFLINE: + /* + * We do not allow to set memory blocks offline that contain + * standby memory. This is done to simplify the "memory online" + * case. + */ + if (contains_standby_increment(start, start + size)) + rc = -EPERM; + break; + case MEM_ONLINE: case MEM_CANCEL_OFFLINE: break; case MEM_GOING_ONLINE: @@ -361,6 +388,21 @@ static struct notifier_block sclp_mem_nb = { .notifier_call = sclp_mem_notifier, }; +static void __init align_to_block_size(unsigned long long *start, + unsigned long long *size) +{ + unsigned long long start_align, size_align, alignment; + + alignment = memory_block_size_bytes(); + start_align = roundup(*start, alignment); + size_align = rounddown(*start + *size, alignment) - start_align; + + pr_info("Standby memory at 0x%llx (%lluM of %lluM usable)\n", + *start, size_align >> 20, *size >> 20); + *start = start_align; + *size = size_align; +} + static void __init add_memory_merged(u16 rn) { static u16 first_rn, num; @@ -382,7 +424,9 @@ static void __init add_memory_merged(u16 rn) goto skip_add; if (memory_end_set && (start + size > memory_end)) size = memory_end - start; - add_memory(0, start, size); + align_to_block_size(&start, &size); + if (size) + add_memory(0, start, size); skip_add: first_rn = rn; num = 1; From f2608cd4a37479d11d0748f524a6a363ee38631f Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 2 Apr 2015 12:27:25 +0200 Subject: [PATCH 4/8] s390/dasd: fix inability to set a DASD device offline Fix ref counting for DASD devices leading to an inability to set a DASD device offline. Before a worker is scheduled the DASD device driver takes a reference to the device. If the worker was already scheduled this reference was never freed. Fix by giving the reference to the DASD device free when schedule_work() returns false. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 9 ++++++--- drivers/s390/block/dasd_eckd.c | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 26a51dc4278d..cd57857a1c84 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -579,7 +579,8 @@ void dasd_kick_device(struct dasd_device *device) { dasd_get_device(device); /* queue call to dasd_kick_device to the kernel event daemon. */ - schedule_work(&device->kick_work); + if (!schedule_work(&device->kick_work)) + dasd_put_device(device); } EXPORT_SYMBOL(dasd_kick_device); @@ -599,7 +600,8 @@ void dasd_reload_device(struct dasd_device *device) { dasd_get_device(device); /* queue call to dasd_reload_device to the kernel event daemon. */ - schedule_work(&device->reload_device); + if (!schedule_work(&device->reload_device)) + dasd_put_device(device); } EXPORT_SYMBOL(dasd_reload_device); @@ -619,7 +621,8 @@ void dasd_restore_device(struct dasd_device *device) { dasd_get_device(device); /* queue call to dasd_restore_device to the kernel event daemon. */ - schedule_work(&device->restore_device); + if (!schedule_work(&device->restore_device)) + dasd_put_device(device); } /* diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 49b48a887c66..6215f6455eb8 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1628,7 +1628,8 @@ static void dasd_eckd_kick_validate_server(struct dasd_device *device) return; } /* queue call to do_validate_server to the kernel event daemon. */ - schedule_work(&device->kick_validate); + if (!schedule_work(&device->kick_validate)) + dasd_put_device(device); } static u32 get_fcx_max_data(struct dasd_device *device) From a3147a7bc266df39b4f471ee7c4c9adcb56d29a6 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 2 Apr 2015 12:52:41 +0200 Subject: [PATCH 5/8] s390/dasd: fix unresumed device after suspend/resume The DASD device driver only has a limited amount of memory to build I/O requests. This memory was used by blocklayer requests leading to an inability to build needed internal requests to resume the device. Fix by preventing the DASD driver to fetch requests for a stopped device. Signed-off-by: Stefan Haberland Reference-ID: RQM 2520 Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index cd57857a1c84..a5ed35d0cbf3 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2527,6 +2527,11 @@ static void __dasd_process_request_queue(struct dasd_block *block) __blk_end_request_all(req, -EIO); return; } + + /* if device ist stopped do not fetch new requests */ + if (basedev->stopped) + return; + /* Now we try to fetch requests from the request queue */ while ((req = blk_peek_request(queue))) { if (basedev->features & DASD_FEATURE_READONLY && From df3044f1ef002c2269b11cb76a1b2bec629732b4 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 2 Apr 2015 13:18:39 +0200 Subject: [PATCH 6/8] s390/dasd: Fix unresumed device after suspend/resume having no paths The DASD device driver prevents I/O from being started on stopped devices. This also prevented channel paths to be verified and so the device was unable to be resumed. Fix by allowing path verification requests on stopped devices. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a5ed35d0cbf3..57fd66357b95 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -2166,18 +2166,22 @@ static int _dasd_sleep_on(struct dasd_ccw_req *maincqr, int interruptible) cqr->intrc = -ENOLINK; continue; } - /* Don't try to start requests if device is stopped */ - if (interruptible) { - rc = wait_event_interruptible( - generic_waitq, !(device->stopped)); - if (rc == -ERESTARTSYS) { - cqr->status = DASD_CQR_FAILED; - maincqr->intrc = rc; - continue; - } - } else - wait_event(generic_waitq, !(device->stopped)); - + /* + * Don't try to start requests if device is stopped + * except path verification requests + */ + if (!test_bit(DASD_CQR_VERIFY_PATH, &cqr->flags)) { + if (interruptible) { + rc = wait_event_interruptible( + generic_waitq, !(device->stopped)); + if (rc == -ERESTARTSYS) { + cqr->status = DASD_CQR_FAILED; + maincqr->intrc = rc; + continue; + } + } else + wait_event(generic_waitq, !(device->stopped)); + } if (!cqr->callback) cqr->callback = dasd_wakeup_cb; From 6001018ae8c659e624351d2e73b1272bacd68d6a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 10 Apr 2015 14:33:08 +0200 Subject: [PATCH 7/8] s390/pci: extract software counters from fmb The software counters are not a part of the function measurement block. Also we do not check for zdev->fmb != NULL when using these counters (function measurement can be toggled at runtime). Just move the software counters to struct zpci_dev. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 8 ++++---- arch/s390/pci/pci.c | 5 +++++ arch/s390/pci/pci_debug.c | 21 +++++++++++++++------ arch/s390/pci/pci_dma.c | 8 ++++---- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index ef803c202d42..d318e38dcb83 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -44,10 +44,6 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; - /* software counters */ - atomic64_t allocated_pages; - atomic64_t mapped_pages; - atomic64_t unmapped_pages; } __packed __aligned(16); enum zpci_state { @@ -111,6 +107,10 @@ struct zpci_dev { /* Function measurement block */ struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ + /* software counters */ + atomic64_t allocated_pages; + atomic64_t mapped_pages; + atomic64_t unmapped_pages; enum pci_bus_speed max_bus_speed; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 98336200c7b2..281893864e1b 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -190,6 +190,11 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) return -ENOMEM; WARN_ON((u64) zdev->fmb & 0xf); + /* reset software counters */ + atomic64_set(&zdev->allocated_pages, 0); + atomic64_set(&zdev->mapped_pages, 0); + atomic64_set(&zdev->unmapped_pages, 0); + args.fmb_addr = virt_to_phys(zdev->fmb); return mod_pci(zdev, ZPCI_MOD_FC_SET_MEASURE, 0, &args); } diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 3229a2e570df..97db1a411d54 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -31,12 +31,25 @@ static char *pci_perf_names[] = { "Refresh operations", "DMA read bytes", "DMA write bytes", - /* software counters */ +}; + +static char *pci_sw_names[] = { "Allocated pages", "Mapped pages", "Unmapped pages", }; +static void pci_sw_counter_show(struct seq_file *m) +{ + struct zpci_dev *zdev = m->private; + atomic64_t *counter = &zdev->allocated_pages; + int i; + + for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) + seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], + atomic64_read(counter)); +} + static int pci_perf_show(struct seq_file *m, void *v) { struct zpci_dev *zdev = m->private; @@ -63,12 +76,8 @@ static int pci_perf_show(struct seq_file *m, void *v) for (i = 4; i < 6; i++) seq_printf(m, "%26s:\t%llu\n", pci_perf_names[i], *(stat + i)); - /* software counters */ - for (i = 6; i < ARRAY_SIZE(pci_perf_names); i++) - seq_printf(m, "%26s:\t%llu\n", - pci_perf_names[i], - atomic64_read((atomic64_t *) (stat + i))); + pci_sw_counter_show(m); return 0; } diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4cbb29a4d615..6fd8d5836138 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -300,7 +300,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, flags |= ZPCI_TABLE_PROTECTED; if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) { - atomic64_add(nr_pages, &zdev->fmb->mapped_pages); + atomic64_add(nr_pages, &zdev->mapped_pages); return dma_addr + (offset & ~PAGE_MASK); } @@ -328,7 +328,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, zpci_err_hex(&dma_addr, sizeof(dma_addr)); } - atomic64_add(npages, &zdev->fmb->unmapped_pages); + atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; dma_free_iommu(zdev, iommu_page_index, npages); } @@ -357,7 +357,7 @@ static void *s390_dma_alloc(struct device *dev, size_t size, return NULL; } - atomic64_add(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); if (dma_handle) *dma_handle = map; return (void *) pa; @@ -370,7 +370,7 @@ static void s390_dma_free(struct device *dev, size_t size, struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); size = PAGE_ALIGN(size); - atomic64_sub(size / PAGE_SIZE, &zdev->fmb->allocated_pages); + atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long) pa, get_order(size)); } From 80ed156a3d12819a8c839f40f5b6996c4aa117a5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 10 Apr 2015 14:34:33 +0200 Subject: [PATCH 8/8] s390/pci: add locking for fmb access Function measurement can be toggled at runtime. Make sure that all access to the fmb is protected via a mutex. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 2 ++ arch/s390/pci/pci.c | 1 + arch/s390/pci/pci_debug.c | 15 +++++++++------ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index d318e38dcb83..a648338c434a 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -7,6 +7,7 @@ #define PCI_BAR_COUNT 6 #include +#include #include #include #include @@ -76,6 +77,7 @@ struct zpci_dev { u8 pft; /* pci function type */ u16 domain; + struct mutex lock; u8 pfip[CLP_PFIP_NR_SEGMENTS]; /* pci function internal path */ u32 uid; /* user defined id */ u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 281893864e1b..598f023cf8a6 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -827,6 +827,7 @@ int zpci_create_device(struct zpci_dev *zdev) if (rc) goto out; + mutex_init(&zdev->lock); if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { rc = zpci_enable_device(zdev); if (rc) diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 97db1a411d54..3fc9b4d90b59 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -58,8 +58,12 @@ static int pci_perf_show(struct seq_file *m, void *v) if (!zdev) return 0; - if (!zdev->fmb) + + mutex_lock(&zdev->lock); + if (!zdev->fmb) { + mutex_unlock(&zdev->lock); return seq_printf(m, "FMB statistics disabled\n"); + } /* header */ seq_printf(m, "FMB @ %p\n", zdev->fmb); @@ -78,6 +82,7 @@ static int pci_perf_show(struct seq_file *m, void *v) pci_perf_names[i], *(stat + i)); pci_sw_counter_show(m); + mutex_unlock(&zdev->lock); return 0; } @@ -95,19 +100,17 @@ static ssize_t pci_perf_seq_write(struct file *file, const char __user *ubuf, if (rc) return rc; + mutex_lock(&zdev->lock); switch (val) { case 0: rc = zpci_fmb_disable_device(zdev); - if (rc) - return rc; break; case 1: rc = zpci_fmb_enable_device(zdev); - if (rc) - return rc; break; } - return count; + mutex_unlock(&zdev->lock); + return rc ? rc : count; } static int pci_perf_seq_open(struct inode *inode, struct file *filp)