Commit e4b8b3f33fcaa0ed6e6b5482a606091d8cd20beb

Authored by Jan Glauber
Committed by Martin Schwidefsky
1 parent 0d804b2073

s390: add support for runtime instrumentation

Allow user-space threads to use runtime instrumentation (RI). To enable RI
for a thread there is a new s390 specific system call, sys_s390_runtime_instr,
that takes as parameter a realtime signal number. If the RI facility is
available the system call sets up a control block for the calling thread with
the appropriate permissions for the thread to modify the control block.

The user-space thread can then use the store and modify RI instructions to
alter the control block and start/stop the instrumentation via RION/RIOFF.

If the user specified program buffer runs full RI triggers an external
interrupt. The external interrupt is translated to a real-time signal that
is delivered to the thread that enabled RI on that CPU. The number of
the real-time signal is the number specified in the RI system call. So,
user-space can select any available real-time signal number in case the
application itself uses real-time signals for other purposes.

The kernel saves the RI control blocks on task switch only if the running
thread was enabled for RI. Therefore, the performance impact on task switch
should be negligible if RI is not used.

RI is only enabled for user-space mode and is disabled for the supervisor
state.

Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

Showing 14 changed files with 300 additions and 3 deletions Side-by-side Diff

arch/s390/include/asm/cpu_mf.h
... ... @@ -21,11 +21,15 @@
21 21 #define CPU_MF_INT_SF_LSDA (1 << 22) /* loss of sample data alert */
22 22 #define CPU_MF_INT_CF_CACA (1 << 7) /* counter auth. change alert */
23 23 #define CPU_MF_INT_CF_LCDA (1 << 6) /* loss of counter data alert */
  24 +#define CPU_MF_INT_RI_HALTED (1 << 5) /* run-time instr. halted */
  25 +#define CPU_MF_INT_RI_BUF_FULL (1 << 4) /* run-time instr. program
  26 + buffer full */
24 27  
25 28 #define CPU_MF_INT_CF_MASK (CPU_MF_INT_CF_CACA|CPU_MF_INT_CF_LCDA)
26 29 #define CPU_MF_INT_SF_MASK (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE| \
27 30 CPU_MF_INT_SF_PRA|CPU_MF_INT_SF_SACA| \
28 31 CPU_MF_INT_SF_LSDA)
  32 +#define CPU_MF_INT_RI_MASK (CPU_MF_INT_RI_HALTED|CPU_MF_INT_RI_BUF_FULL)
29 33  
30 34 /* CPU measurement facility support */
31 35 static inline int cpum_cf_avail(void)
arch/s390/include/asm/irq.h
... ... @@ -19,6 +19,7 @@
19 19 EXTINT_IUC,
20 20 EXTINT_CMS,
21 21 EXTINT_CMC,
  22 + EXTINT_CMR,
22 23 IOINT_CIO,
23 24 IOINT_QAI,
24 25 IOINT_DAS,
arch/s390/include/asm/processor.h
... ... @@ -17,6 +17,7 @@
17 17 #include <asm/page.h>
18 18 #include <asm/ptrace.h>
19 19 #include <asm/setup.h>
  20 +#include <asm/runtime_instr.h>
20 21  
21 22 /*
22 23 * Default implementation of macro that returns current
... ... @@ -78,6 +79,9 @@
78 79 /* pfault_wait is used to block the process on a pfault event */
79 80 unsigned long pfault_wait;
80 81 struct list_head list;
  82 + /* cpu runtime instrumentation */
  83 + struct runtime_instr_cb *ri_cb;
  84 + int ri_signum;
81 85 };
82 86  
83 87 typedef struct thread_struct thread_struct;
arch/s390/include/asm/ptrace.h
... ... @@ -235,6 +235,7 @@
235 235 #define PSW_MASK_ASC 0x0000C000UL
236 236 #define PSW_MASK_CC 0x00003000UL
237 237 #define PSW_MASK_PM 0x00000F00UL
  238 +#define PSW_MASK_RI 0x00000000UL
238 239 #define PSW_MASK_EA 0x00000000UL
239 240 #define PSW_MASK_BA 0x00000000UL
240 241  
241 242  
... ... @@ -264,10 +265,11 @@
264 265 #define PSW_MASK_ASC 0x0000C00000000000UL
265 266 #define PSW_MASK_CC 0x0000300000000000UL
266 267 #define PSW_MASK_PM 0x00000F0000000000UL
  268 +#define PSW_MASK_RI 0x0000008000000000UL
267 269 #define PSW_MASK_EA 0x0000000100000000UL
268 270 #define PSW_MASK_BA 0x0000000080000000UL
269 271  
270   -#define PSW_MASK_USER 0x00003F0180000000UL
  272 +#define PSW_MASK_USER 0x00003F8180000000UL
271 273  
272 274 #define PSW_ADDR_AMODE 0x0000000000000000UL
273 275 #define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
arch/s390/include/asm/runtime_instr.h
  1 +#ifndef _RUNTIME_INSTR_H
  2 +#define _RUNTIME_INSTR_H
  3 +
  4 +#define S390_RUNTIME_INSTR_START 0x1
  5 +#define S390_RUNTIME_INSTR_STOP 0x2
  6 +
  7 +struct runtime_instr_cb {
  8 + __u64 buf_current;
  9 + __u64 buf_origin;
  10 + __u64 buf_limit;
  11 +
  12 + __u32 valid : 1;
  13 + __u32 pstate : 1;
  14 + __u32 pstate_set_buf : 1;
  15 + __u32 home_space : 1;
  16 + __u32 altered : 1;
  17 + __u32 : 3;
  18 + __u32 pstate_sample : 1;
  19 + __u32 sstate_sample : 1;
  20 + __u32 pstate_collect : 1;
  21 + __u32 sstate_collect : 1;
  22 + __u32 : 1;
  23 + __u32 halted_int : 1;
  24 + __u32 int_requested : 1;
  25 + __u32 buffer_full_int : 1;
  26 + __u32 key : 4;
  27 + __u32 : 9;
  28 + __u32 rgs : 3;
  29 +
  30 + __u32 mode : 4;
  31 + __u32 next : 1;
  32 + __u32 mae : 1;
  33 + __u32 : 2;
  34 + __u32 call_type_br : 1;
  35 + __u32 return_type_br : 1;
  36 + __u32 other_type_br : 1;
  37 + __u32 bc_other_type : 1;
  38 + __u32 emit : 1;
  39 + __u32 tx_abort : 1;
  40 + __u32 : 2;
  41 + __u32 bp_xn : 1;
  42 + __u32 bp_xt : 1;
  43 + __u32 bp_ti : 1;
  44 + __u32 bp_ni : 1;
  45 + __u32 suppr_y : 1;
  46 + __u32 suppr_z : 1;
  47 +
  48 + __u32 dc_miss_extra : 1;
  49 + __u32 lat_lev_ignore : 1;
  50 + __u32 ic_lat_lev : 4;
  51 + __u32 dc_lat_lev : 4;
  52 +
  53 + __u64 reserved1;
  54 + __u64 scaling_factor;
  55 + __u64 rsic;
  56 + __u64 reserved2;
  57 +} __packed __aligned(8);
  58 +
  59 +extern struct runtime_instr_cb runtime_instr_empty_cb;
  60 +
  61 +static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
  62 +{
  63 + asm volatile(".insn rsy,0xeb0000000060,0,0,%0" /* LRIC */
  64 + : : "Q" (*cb));
  65 +}
  66 +
  67 +static inline void store_runtime_instr_cb(struct runtime_instr_cb *cb)
  68 +{
  69 + asm volatile(".insn rsy,0xeb0000000061,0,0,%0" /* STRIC */
  70 + : "=Q" (*cb) : : "cc");
  71 +}
  72 +
  73 +static inline void save_ri_cb(struct runtime_instr_cb *cb_prev)
  74 +{
  75 +#ifdef CONFIG_64BIT
  76 + if (cb_prev)
  77 + store_runtime_instr_cb(cb_prev);
  78 +#endif
  79 +}
  80 +
  81 +static inline void restore_ri_cb(struct runtime_instr_cb *cb_next,
  82 + struct runtime_instr_cb *cb_prev)
  83 +{
  84 +#ifdef CONFIG_64BIT
  85 + if (cb_next)
  86 + load_runtime_instr_cb(cb_next);
  87 + else if (cb_prev)
  88 + load_runtime_instr_cb(&runtime_instr_empty_cb);
  89 +#endif
  90 +}
  91 +
  92 +#ifdef CONFIG_64BIT
  93 +extern void exit_thread_runtime_instr(void);
  94 +#else
  95 +static inline void exit_thread_runtime_instr(void) { }
  96 +#endif
  97 +
  98 +#endif /* _RUNTIME_INSTR_H */
arch/s390/include/asm/switch_to.h
... ... @@ -80,10 +80,12 @@
80 80 if (prev->mm) { \
81 81 save_fp_regs(&prev->thread.fp_regs); \
82 82 save_access_regs(&prev->thread.acrs[0]); \
  83 + save_ri_cb(prev->thread.ri_cb); \
83 84 } \
84 85 if (next->mm) { \
85 86 restore_fp_regs(&next->thread.fp_regs); \
86 87 restore_access_regs(&next->thread.acrs[0]); \
  88 + restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
87 89 update_per_regs(next); \
88 90 } \
89 91 prev = __switch_to(prev,next); \
arch/s390/include/asm/unistd.h
... ... @@ -277,7 +277,8 @@
277 277 #define __NR_setns 339
278 278 #define __NR_process_vm_readv 340
279 279 #define __NR_process_vm_writev 341
280   -#define NR_syscalls 342
  280 +#define __NR_s390_runtime_instr 342
  281 +#define NR_syscalls 343
281 282  
282 283 /*
283 284 * There are some system calls that are not present on 64 bit, some
arch/s390/kernel/Makefile
... ... @@ -49,7 +49,7 @@
49 49 obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
50 50 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
51 51 obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o
52   -obj-$(CONFIG_64BIT) += cache.o
  52 +obj-$(CONFIG_64BIT) += runtime_instr.o cache.o
53 53  
54 54 # Kexec part
55 55 S390_KEXEC_OBJS := machine_kexec.o
arch/s390/kernel/compat_wrapper.S
... ... @@ -1646,4 +1646,9 @@
1646 1646 llgf %r0,164(%r15) # unsigned long
1647 1647 stg %r0,160(%r15)
1648 1648 jg compat_sys_process_vm_writev
  1649 +
  1650 +ENTRY(sys_s390_runtime_instr_wrapper)
  1651 + lgfr %r2,%r2 # int
  1652 + lgfr %r3,%r3 # int
  1653 + jg sys_s390_runtime_instr
arch/s390/kernel/dis.c
... ... @@ -315,6 +315,8 @@
315 315 LONG_INSN_POPCNT,
316 316 LONG_INSN_RISBHG,
317 317 LONG_INSN_RISBLG,
  318 + LONG_INSN_RINEXT,
  319 + LONG_INSN_RIEMIT,
318 320 };
319 321  
320 322 static char *long_insn_name[] = {
... ... @@ -330,6 +332,8 @@
330 332 [LONG_INSN_POPCNT] = "popcnt",
331 333 [LONG_INSN_RISBHG] = "risbhg",
332 334 [LONG_INSN_RISBLG] = "risblk",
  335 + [LONG_INSN_RINEXT] = "rinext",
  336 + [LONG_INSN_RIEMIT] = "riemit",
333 337 };
334 338  
335 339 static struct insn opcode[] = {
... ... @@ -582,6 +586,17 @@
582 586 { "", 0, INSTR_INVALID }
583 587 };
584 588  
  589 +static struct insn opcode_aa[] = {
  590 +#ifdef CONFIG_64BIT
  591 + { { 0, LONG_INSN_RINEXT }, 0x00, INSTR_RI_RI },
  592 + { "rion", 0x01, INSTR_RI_RI },
  593 + { "tric", 0x02, INSTR_RI_RI },
  594 + { "rioff", 0x03, INSTR_RI_RI },
  595 + { { 0, LONG_INSN_RIEMIT }, 0x04, INSTR_RI_RI },
  596 +#endif
  597 + { "", 0, INSTR_INVALID }
  598 +};
  599 +
585 600 static struct insn opcode_b2[] = {
586 601 #ifdef CONFIG_64BIT
587 602 { "sske", 0x2b, INSTR_RRF_M0RR },
... ... @@ -1210,6 +1225,9 @@
1210 1225 { "cliy", 0x55, INSTR_SIY_URD },
1211 1226 { "oiy", 0x56, INSTR_SIY_URD },
1212 1227 { "xiy", 0x57, INSTR_SIY_URD },
  1228 + { "lric", 0x60, INSTR_RSY_RDRM },
  1229 + { "stric", 0x61, INSTR_RSY_RDRM },
  1230 + { "mric", 0x62, INSTR_RSY_RDRM },
1213 1231 { "icmh", 0x80, INSTR_RSE_RURD },
1214 1232 { "icmh", 0x80, INSTR_RSY_RURD },
1215 1233 { "icmy", 0x81, INSTR_RSY_RURD },
... ... @@ -1407,6 +1425,9 @@
1407 1425 break;
1408 1426 case 0xa7:
1409 1427 table = opcode_a7;
  1428 + break;
  1429 + case 0xaa:
  1430 + table = opcode_aa;
1410 1431 break;
1411 1432 case 0xb2:
1412 1433 table = opcode_b2;
arch/s390/kernel/irq.c
... ... @@ -44,6 +44,7 @@
44 44 {.name = "IUC", .desc = "[EXT] IUCV" },
45 45 {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling" },
46 46 {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter" },
  47 + {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI" },
47 48 {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt" },
48 49 {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt" },
49 50 {.name = "DAS", .desc = "[I/O] DASD" },
arch/s390/kernel/process.c
... ... @@ -30,6 +30,7 @@
30 30 #include <asm/nmi.h>
31 31 #include <asm/smp.h>
32 32 #include <asm/switch_to.h>
  33 +#include <asm/runtime_instr.h>
33 34 #include "entry.h"
34 35  
35 36 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
... ... @@ -132,6 +133,7 @@
132 133 */
133 134 void exit_thread(void)
134 135 {
  136 + exit_thread_runtime_instr();
135 137 }
136 138  
137 139 void flush_thread(void)
... ... @@ -169,6 +171,11 @@
169 171  
170 172 /* Save access registers to new thread structure. */
171 173 save_access_regs(&p->thread.acrs[0]);
  174 +
  175 + /* Don't copy runtime instrumentation info */
  176 + p->thread.ri_cb = NULL;
  177 + p->thread.ri_signum = 0;
  178 + frame->childregs.psw.mask &= ~PSW_MASK_RI;
172 179  
173 180 #ifndef CONFIG_64BIT
174 181 /*
arch/s390/kernel/runtime_instr.c
  1 +/*
  2 + * Copyright IBM Corp. 2012
  3 + * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
  4 + */
  5 +
  6 +#include <linux/kernel.h>
  7 +#include <linux/syscalls.h>
  8 +#include <linux/signal.h>
  9 +#include <linux/mm.h>
  10 +#include <linux/slab.h>
  11 +#include <linux/init.h>
  12 +#include <linux/errno.h>
  13 +#include <linux/kernel_stat.h>
  14 +#include <asm/runtime_instr.h>
  15 +#include <asm/cpu_mf.h>
  16 +#include <asm/irq.h>
  17 +
  18 +/* empty control block to disable RI by loading it */
  19 +struct runtime_instr_cb runtime_instr_empty_cb;
  20 +
  21 +static int runtime_instr_avail(void)
  22 +{
  23 + return test_facility(64);
  24 +}
  25 +
  26 +static void disable_runtime_instr(void)
  27 +{
  28 + struct pt_regs *regs = task_pt_regs(current);
  29 +
  30 + load_runtime_instr_cb(&runtime_instr_empty_cb);
  31 +
  32 + /*
  33 + * Make sure the RI bit is deleted from the PSW. If the user did not
  34 + * switch off RI before the system call the process will get a
  35 + * specification exception otherwise.
  36 + */
  37 + regs->psw.mask &= ~PSW_MASK_RI;
  38 +}
  39 +
  40 +static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
  41 +{
  42 + cb->buf_limit = 0xfff;
  43 + if (addressing_mode == HOME_SPACE_MODE)
  44 + cb->home_space = 1;
  45 + cb->int_requested = 1;
  46 + cb->pstate = 1;
  47 + cb->pstate_set_buf = 1;
  48 + cb->pstate_sample = 1;
  49 + cb->pstate_collect = 1;
  50 + cb->key = PAGE_DEFAULT_KEY;
  51 + cb->valid = 1;
  52 +}
  53 +
  54 +void exit_thread_runtime_instr(void)
  55 +{
  56 + struct task_struct *task = current;
  57 +
  58 + if (!task->thread.ri_cb)
  59 + return;
  60 + disable_runtime_instr();
  61 + kfree(task->thread.ri_cb);
  62 + task->thread.ri_signum = 0;
  63 + task->thread.ri_cb = NULL;
  64 +}
  65 +
  66 +static void runtime_instr_int_handler(struct ext_code ext_code,
  67 + unsigned int param32, unsigned long param64)
  68 +{
  69 + struct siginfo info;
  70 +
  71 + if (!(param32 & CPU_MF_INT_RI_MASK))
  72 + return;
  73 +
  74 + kstat_cpu(smp_processor_id()).irqs[EXTINT_CMR]++;
  75 +
  76 + if (!current->thread.ri_cb)
  77 + return;
  78 + if (current->thread.ri_signum < SIGRTMIN ||
  79 + current->thread.ri_signum > SIGRTMAX) {
  80 + WARN_ON_ONCE(1);
  81 + return;
  82 + }
  83 +
  84 + memset(&info, 0, sizeof(info));
  85 + info.si_signo = current->thread.ri_signum;
  86 + info.si_code = SI_QUEUE;
  87 + if (param32 & CPU_MF_INT_RI_BUF_FULL)
  88 + info.si_int = ENOBUFS;
  89 + else if (param32 & CPU_MF_INT_RI_HALTED)
  90 + info.si_int = ECANCELED;
  91 + else
  92 + return; /* unknown reason */
  93 +
  94 + send_sig_info(current->thread.ri_signum, &info, current);
  95 +}
  96 +
  97 +SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
  98 +{
  99 + struct runtime_instr_cb *cb;
  100 +
  101 + if (!runtime_instr_avail())
  102 + return -EOPNOTSUPP;
  103 +
  104 + if (command == S390_RUNTIME_INSTR_STOP) {
  105 + preempt_disable();
  106 + exit_thread_runtime_instr();
  107 + preempt_enable();
  108 + return 0;
  109 + }
  110 +
  111 + if (command != S390_RUNTIME_INSTR_START ||
  112 + (signum < SIGRTMIN || signum > SIGRTMAX))
  113 + return -EINVAL;
  114 +
  115 + if (!current->thread.ri_cb) {
  116 + cb = kzalloc(sizeof(*cb), GFP_KERNEL);
  117 + if (!cb)
  118 + return -ENOMEM;
  119 + } else {
  120 + cb = current->thread.ri_cb;
  121 + memset(cb, 0, sizeof(*cb));
  122 + }
  123 +
  124 + init_runtime_instr_cb(cb);
  125 + current->thread.ri_signum = signum;
  126 +
  127 + /* now load the control block to make it available */
  128 + preempt_disable();
  129 + current->thread.ri_cb = cb;
  130 + load_runtime_instr_cb(cb);
  131 + preempt_enable();
  132 + return 0;
  133 +}
  134 +
  135 +static int __init runtime_instr_init(void)
  136 +{
  137 + int rc;
  138 +
  139 + if (!runtime_instr_avail())
  140 + return 0;
  141 +
  142 + measurement_alert_subclass_register();
  143 + rc = register_external_interrupt(0x1407, runtime_instr_int_handler);
  144 + if (rc)
  145 + measurement_alert_subclass_unregister();
  146 + else
  147 + pr_info("Runtime instrumentation facility initialized\n");
  148 + return rc;
  149 +}
  150 +device_initcall(runtime_instr_init);
arch/s390/kernel/syscalls.S
... ... @@ -350,4 +350,5 @@
350 350 SYSCALL(sys_setns,sys_setns,sys_setns_wrapper)
351 351 SYSCALL(sys_process_vm_readv,sys_process_vm_readv,compat_sys_process_vm_readv_wrapper) /* 340 */
352 352 SYSCALL(sys_process_vm_writev,sys_process_vm_writev,compat_sys_process_vm_writev_wrapper)
  353 +SYSCALL(sys_ni_syscall,sys_s390_runtime_instr,sys_s390_runtime_instr_wrapper)