source: src/linux/universal/linux-4.9/arch/x86/events/intel/lbr.c @ 31885

Last change on this file since 31885 was 31885, checked in by brainslayer, 3 months ago

update

File size: 29.5 KB
Line 
1#include <linux/perf_event.h>
2#include <linux/types.h>
3
4#include <asm/perf_event.h>
5#include <asm/msr.h>
6#include <asm/insn.h>
7
8#include "../perf_event.h"
9
10enum {
11        LBR_FORMAT_32           = 0x00,
12        LBR_FORMAT_LIP          = 0x01,
13        LBR_FORMAT_EIP          = 0x02,
14        LBR_FORMAT_EIP_FLAGS    = 0x03,
15        LBR_FORMAT_EIP_FLAGS2   = 0x04,
16        LBR_FORMAT_INFO         = 0x05,
17        LBR_FORMAT_TIME         = 0x06,
18        LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_TIME,
19};
20
21static enum {
22        LBR_EIP_FLAGS           = 1,
23        LBR_TSX                 = 2,
24} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
25        [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
26        [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
27};
28
29/*
30 * Intel LBR_SELECT bits
31 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
32 *
33 * Hardware branch filter (not available on all CPUs)
34 */
35#define LBR_KERNEL_BIT          0 /* do not capture at ring0 */
36#define LBR_USER_BIT            1 /* do not capture at ring > 0 */
37#define LBR_JCC_BIT             2 /* do not capture conditional branches */
38#define LBR_REL_CALL_BIT        3 /* do not capture relative calls */
39#define LBR_IND_CALL_BIT        4 /* do not capture indirect calls */
40#define LBR_RETURN_BIT          5 /* do not capture near returns */
41#define LBR_IND_JMP_BIT         6 /* do not capture indirect jumps */
42#define LBR_REL_JMP_BIT         7 /* do not capture relative jumps */
43#define LBR_FAR_BIT             8 /* do not capture far branches */
44#define LBR_CALL_STACK_BIT      9 /* enable call stack */
45
46/*
47 * Following bit only exists in Linux; we mask it out before writing it to
48 * the actual MSR. But it helps the constraint perf code to understand
49 * that this is a separate configuration.
50 */
51#define LBR_NO_INFO_BIT        63 /* don't read LBR_INFO. */
52
53#define LBR_KERNEL      (1 << LBR_KERNEL_BIT)
54#define LBR_USER        (1 << LBR_USER_BIT)
55#define LBR_JCC         (1 << LBR_JCC_BIT)
56#define LBR_REL_CALL    (1 << LBR_REL_CALL_BIT)
57#define LBR_IND_CALL    (1 << LBR_IND_CALL_BIT)
58#define LBR_RETURN      (1 << LBR_RETURN_BIT)
59#define LBR_REL_JMP     (1 << LBR_REL_JMP_BIT)
60#define LBR_IND_JMP     (1 << LBR_IND_JMP_BIT)
61#define LBR_FAR         (1 << LBR_FAR_BIT)
62#define LBR_CALL_STACK  (1 << LBR_CALL_STACK_BIT)
63#define LBR_NO_INFO     (1ULL << LBR_NO_INFO_BIT)
64
65#define LBR_PLM (LBR_KERNEL | LBR_USER)
66
67#define LBR_SEL_MASK    0x3ff   /* valid bits in LBR_SELECT */
68#define LBR_NOT_SUPP    -1      /* LBR filter not supported */
69#define LBR_IGN         0       /* ignored */
70
71#define LBR_ANY          \
72        (LBR_JCC        |\
73         LBR_REL_CALL   |\
74         LBR_IND_CALL   |\
75         LBR_RETURN     |\
76         LBR_REL_JMP    |\
77         LBR_IND_JMP    |\
78         LBR_FAR)
79
80#define LBR_FROM_FLAG_MISPRED   BIT_ULL(63)
81#define LBR_FROM_FLAG_IN_TX     BIT_ULL(62)
82#define LBR_FROM_FLAG_ABORT     BIT_ULL(61)
83
84#define LBR_FROM_SIGNEXT_2MSB   (BIT_ULL(60) | BIT_ULL(59))
85
86/*
87 * x86control flow change classification
88 * x86control flow changes include branches, interrupts, traps, faults
89 */
90enum {
91        X86_BR_NONE             = 0,      /* unknown */
92
93        X86_BR_USER             = 1 << 0, /* branch target is user */
94        X86_BR_KERNEL           = 1 << 1, /* branch target is kernel */
95
96        X86_BR_CALL             = 1 << 2, /* call */
97        X86_BR_RET              = 1 << 3, /* return */
98        X86_BR_SYSCALL          = 1 << 4, /* syscall */
99        X86_BR_SYSRET           = 1 << 5, /* syscall return */
100        X86_BR_INT              = 1 << 6, /* sw interrupt */
101        X86_BR_IRET             = 1 << 7, /* return from interrupt */
102        X86_BR_JCC              = 1 << 8, /* conditional */
103        X86_BR_JMP              = 1 << 9, /* jump */
104        X86_BR_IRQ              = 1 << 10,/* hw interrupt or trap or fault */
105        X86_BR_IND_CALL         = 1 << 11,/* indirect calls */
106        X86_BR_ABORT            = 1 << 12,/* transaction abort */
107        X86_BR_IN_TX            = 1 << 13,/* in transaction */
108        X86_BR_NO_TX            = 1 << 14,/* not in transaction */
109        X86_BR_ZERO_CALL        = 1 << 15,/* zero length call */
110        X86_BR_CALL_STACK       = 1 << 16,/* call stack */
111        X86_BR_IND_JMP          = 1 << 17,/* indirect jump */
112};
113
114#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
115#define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
116
117#define X86_BR_ANY       \
118        (X86_BR_CALL    |\
119         X86_BR_RET     |\
120         X86_BR_SYSCALL |\
121         X86_BR_SYSRET  |\
122         X86_BR_INT     |\
123         X86_BR_IRET    |\
124         X86_BR_JCC     |\
125         X86_BR_JMP      |\
126         X86_BR_IRQ      |\
127         X86_BR_ABORT    |\
128         X86_BR_IND_CALL |\
129         X86_BR_IND_JMP  |\
130         X86_BR_ZERO_CALL)
131
132#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
133
134#define X86_BR_ANY_CALL          \
135        (X86_BR_CALL            |\
136         X86_BR_IND_CALL        |\
137         X86_BR_ZERO_CALL       |\
138         X86_BR_SYSCALL         |\
139         X86_BR_IRQ             |\
140         X86_BR_INT)
141
142static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
143
144/*
145 * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
146 * otherwise it becomes near impossible to get a reliable stack.
147 */
148
149static void __intel_pmu_lbr_enable(bool pmi)
150{
151        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
152        u64 debugctl, lbr_select = 0, orig_debugctl;
153
154        /*
155         * No need to unfreeze manually, as v4 can do that as part
156         * of the GLOBAL_STATUS ack.
157         */
158        if (pmi && x86_pmu.version >= 4)
159                return;
160
161        /*
162         * No need to reprogram LBR_SELECT in a PMI, as it
163         * did not change.
164         */
165        if (cpuc->lbr_sel)
166                lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
167        if (!pmi && cpuc->lbr_sel)
168                wrmsrl(MSR_LBR_SELECT, lbr_select);
169
170        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
171        orig_debugctl = debugctl;
172        debugctl |= DEBUGCTLMSR_LBR;
173        /*
174         * LBR callstack does not work well with FREEZE_LBRS_ON_PMI.
175         * If FREEZE_LBRS_ON_PMI is set, PMI near call/return instructions
176         * may cause superfluous increase/decrease of LBR_TOS.
177         */
178        if (!(lbr_select & LBR_CALL_STACK))
179                debugctl |= DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
180        if (orig_debugctl != debugctl)
181                wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
182}
183
184static void __intel_pmu_lbr_disable(void)
185{
186        u64 debugctl;
187
188        rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
189        debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
190        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
191}
192
193static void intel_pmu_lbr_reset_32(void)
194{
195        int i;
196
197        for (i = 0; i < x86_pmu.lbr_nr; i++)
198                wrmsrl(x86_pmu.lbr_from + i, 0);
199}
200
201static void intel_pmu_lbr_reset_64(void)
202{
203        int i;
204
205        for (i = 0; i < x86_pmu.lbr_nr; i++) {
206                wrmsrl(x86_pmu.lbr_from + i, 0);
207                wrmsrl(x86_pmu.lbr_to   + i, 0);
208                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
209                        wrmsrl(MSR_LBR_INFO_0 + i, 0);
210        }
211}
212
213void intel_pmu_lbr_reset(void)
214{
215        if (!x86_pmu.lbr_nr)
216                return;
217
218        if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
219                intel_pmu_lbr_reset_32();
220        else
221                intel_pmu_lbr_reset_64();
222}
223
224/*
225 * TOS = most recently recorded branch
226 */
227static inline u64 intel_pmu_lbr_tos(void)
228{
229        u64 tos;
230
231        rdmsrl(x86_pmu.lbr_tos, tos);
232        return tos;
233}
234
235enum {
236        LBR_NONE,
237        LBR_VALID,
238};
239
240/*
241 * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
242 * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
243 * TSX is not supported they have no consistent behavior:
244 *
245 *   - For wrmsr(), bits 61:62 are considered part of the sign extension.
246 *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
247 *     part of the sign extension.
248 *
249 * Therefore, if:
250 *
251 *   1) LBR has TSX format
252 *   2) CPU has no TSX support enabled
253 *
254 * ... then any value passed to wrmsr() must be sign extended to 63 bits and any
255 * value from rdmsr() must be converted to have a 61 bits sign extension,
256 * ignoring the TSX flags.
257 */
258static inline bool lbr_from_signext_quirk_needed(void)
259{
260        int lbr_format = x86_pmu.intel_cap.lbr_format;
261        bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
262                           boot_cpu_has(X86_FEATURE_RTM);
263
264        return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
265}
266
267DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
268
269/* If quirk is enabled, ensure sign extension is 63 bits: */
270inline u64 lbr_from_signext_quirk_wr(u64 val)
271{
272        if (static_branch_unlikely(&lbr_from_quirk_key)) {
273                /*
274                 * Sign extend into bits 61:62 while preserving bit 63.
275                 *
276                 * Quirk is enabled when TSX is disabled. Therefore TSX bits
277                 * in val are always OFF and must be changed to be sign
278                 * extension bits. Since bits 59:60 are guaranteed to be
279                 * part of the sign extension bits, we can just copy them
280                 * to 61:62.
281                 */
282                val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
283        }
284        return val;
285}
286
287/*
288 * If quirk is needed, ensure sign extension is 61 bits:
289 */
290u64 lbr_from_signext_quirk_rd(u64 val)
291{
292        if (static_branch_unlikely(&lbr_from_quirk_key)) {
293                /*
294                 * Quirk is on when TSX is not enabled. Therefore TSX
295                 * flags must be read as OFF.
296                 */
297                val &= ~(LBR_FROM_FLAG_IN_TX | LBR_FROM_FLAG_ABORT);
298        }
299        return val;
300}
301
302static inline void wrlbr_from(unsigned int idx, u64 val)
303{
304        val = lbr_from_signext_quirk_wr(val);
305        wrmsrl(x86_pmu.lbr_from + idx, val);
306}
307
308static inline void wrlbr_to(unsigned int idx, u64 val)
309{
310        wrmsrl(x86_pmu.lbr_to + idx, val);
311}
312
313static inline u64 rdlbr_from(unsigned int idx)
314{
315        u64 val;
316
317        rdmsrl(x86_pmu.lbr_from + idx, val);
318
319        return lbr_from_signext_quirk_rd(val);
320}
321
322static inline u64 rdlbr_to(unsigned int idx)
323{
324        u64 val;
325
326        rdmsrl(x86_pmu.lbr_to + idx, val);
327
328        return val;
329}
330
331static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
332{
333        int i;
334        unsigned lbr_idx, mask;
335        u64 tos;
336
337        if (task_ctx->lbr_callstack_users == 0 ||
338            task_ctx->lbr_stack_state == LBR_NONE) {
339                intel_pmu_lbr_reset();
340                return;
341        }
342
343        mask = x86_pmu.lbr_nr - 1;
344        tos = task_ctx->tos;
345        for (i = 0; i < tos; i++) {
346                lbr_idx = (tos - i) & mask;
347                wrlbr_from(lbr_idx, task_ctx->lbr_from[i]);
348                wrlbr_to  (lbr_idx, task_ctx->lbr_to[i]);
349
350                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
351                        wrmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
352        }
353        wrmsrl(x86_pmu.lbr_tos, tos);
354        task_ctx->lbr_stack_state = LBR_NONE;
355}
356
357static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
358{
359        unsigned lbr_idx, mask;
360        u64 tos;
361        int i;
362
363        if (task_ctx->lbr_callstack_users == 0) {
364                task_ctx->lbr_stack_state = LBR_NONE;
365                return;
366        }
367
368        mask = x86_pmu.lbr_nr - 1;
369        tos = intel_pmu_lbr_tos();
370        for (i = 0; i < tos; i++) {
371                lbr_idx = (tos - i) & mask;
372                task_ctx->lbr_from[i] = rdlbr_from(lbr_idx);
373                task_ctx->lbr_to[i]   = rdlbr_to(lbr_idx);
374                if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
375                        rdmsrl(MSR_LBR_INFO_0 + lbr_idx, task_ctx->lbr_info[i]);
376        }
377        task_ctx->tos = tos;
378        task_ctx->lbr_stack_state = LBR_VALID;
379}
380
381void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
382{
383        struct x86_perf_task_context *task_ctx;
384
385        /*
386         * If LBR callstack feature is enabled and the stack was saved when
387         * the task was scheduled out, restore the stack. Otherwise flush
388         * the LBR stack.
389         */
390        task_ctx = ctx ? ctx->task_ctx_data : NULL;
391        if (task_ctx) {
392                if (sched_in)
393                        __intel_pmu_lbr_restore(task_ctx);
394                else
395                        __intel_pmu_lbr_save(task_ctx);
396                return;
397        }
398
399        /*
400         * Since a context switch can flip the address space and LBR entries
401         * are not tagged with an identifier, we need to wipe the LBR, even for
402         * per-cpu events. You simply cannot resolve the branches from the old
403         * address space.
404         */
405        if (sched_in)
406                intel_pmu_lbr_reset();
407}
408
409static inline bool branch_user_callstack(unsigned br_sel)
410{
411        return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
412}
413
414void intel_pmu_lbr_add(struct perf_event *event)
415{
416        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
417        struct x86_perf_task_context *task_ctx;
418
419        if (!x86_pmu.lbr_nr)
420                return;
421
422        cpuc->br_sel = event->hw.branch_reg.reg;
423
424        if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
425                task_ctx = event->ctx->task_ctx_data;
426                task_ctx->lbr_callstack_users++;
427        }
428
429        /*
430         * Request pmu::sched_task() callback, which will fire inside the
431         * regular perf event scheduling, so that call will:
432         *
433         *  - restore or wipe; when LBR-callstack,
434         *  - wipe; otherwise,
435         *
436         * when this is from __perf_event_task_sched_in().
437         *
438         * However, if this is from perf_install_in_context(), no such callback
439         * will follow and we'll need to reset the LBR here if this is the
440         * first LBR event.
441         *
442         * The problem is, we cannot tell these cases apart... but we can
443         * exclude the biggest chunk of cases by looking at
444         * event->total_time_running. An event that has accrued runtime cannot
445         * be 'new'. Conversely, a new event can get installed through the
446         * context switch path for the first time.
447         */
448        perf_sched_cb_inc(event->ctx->pmu);
449        if (!cpuc->lbr_users++ && !event->total_time_running)
450                intel_pmu_lbr_reset();
451}
452
453void intel_pmu_lbr_del(struct perf_event *event)
454{
455        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
456        struct x86_perf_task_context *task_ctx;
457
458        if (!x86_pmu.lbr_nr)
459                return;
460
461        if (branch_user_callstack(cpuc->br_sel) &&
462            event->ctx->task_ctx_data) {
463                task_ctx = event->ctx->task_ctx_data;
464                task_ctx->lbr_callstack_users--;
465        }
466
467        cpuc->lbr_users--;
468        WARN_ON_ONCE(cpuc->lbr_users < 0);
469        perf_sched_cb_dec(event->ctx->pmu);
470}
471
472void intel_pmu_lbr_enable_all(bool pmi)
473{
474        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
475
476        if (cpuc->lbr_users)
477                __intel_pmu_lbr_enable(pmi);
478}
479
480void intel_pmu_lbr_disable_all(void)
481{
482        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
483
484        if (cpuc->lbr_users)
485                __intel_pmu_lbr_disable();
486}
487
488static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
489{
490        unsigned long mask = x86_pmu.lbr_nr - 1;
491        u64 tos = intel_pmu_lbr_tos();
492        int i;
493
494        for (i = 0; i < x86_pmu.lbr_nr; i++) {
495                unsigned long lbr_idx = (tos - i) & mask;
496                union {
497                        struct {
498                                u32 from;
499                                u32 to;
500                        };
501                        u64     lbr;
502                } msr_lastbranch;
503
504                rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
505
506                cpuc->lbr_entries[i].from       = msr_lastbranch.from;
507                cpuc->lbr_entries[i].to         = msr_lastbranch.to;
508                cpuc->lbr_entries[i].mispred    = 0;
509                cpuc->lbr_entries[i].predicted  = 0;
510                cpuc->lbr_entries[i].in_tx      = 0;
511                cpuc->lbr_entries[i].abort      = 0;
512                cpuc->lbr_entries[i].cycles     = 0;
513                cpuc->lbr_entries[i].reserved   = 0;
514        }
515        cpuc->lbr_stack.nr = i;
516}
517
518/*
519 * Due to lack of segmentation in Linux the effective address (offset)
520 * is the same as the linear address, allowing us to merge the LIP and EIP
521 * LBR formats.
522 */
523static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
524{
525        bool need_info = false;
526        unsigned long mask = x86_pmu.lbr_nr - 1;
527        int lbr_format = x86_pmu.intel_cap.lbr_format;
528        u64 tos = intel_pmu_lbr_tos();
529        int i;
530        int out = 0;
531        int num = x86_pmu.lbr_nr;
532
533        if (cpuc->lbr_sel) {
534                need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
535                if (cpuc->lbr_sel->config & LBR_CALL_STACK)
536                        num = tos;
537        }
538
539        for (i = 0; i < num; i++) {
540                unsigned long lbr_idx = (tos - i) & mask;
541                u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
542                int skip = 0;
543                u16 cycles = 0;
544                int lbr_flags = lbr_desc[lbr_format];
545
546                from = rdlbr_from(lbr_idx);
547                to   = rdlbr_to(lbr_idx);
548
549                if (lbr_format == LBR_FORMAT_INFO && need_info) {
550                        u64 info;
551
552                        rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
553                        mis = !!(info & LBR_INFO_MISPRED);
554                        pred = !mis;
555                        in_tx = !!(info & LBR_INFO_IN_TX);
556                        abort = !!(info & LBR_INFO_ABORT);
557                        cycles = (info & LBR_INFO_CYCLES);
558                }
559
560                if (lbr_format == LBR_FORMAT_TIME) {
561                        mis = !!(from & LBR_FROM_FLAG_MISPRED);
562                        pred = !mis;
563                        skip = 1;
564                        cycles = ((to >> 48) & LBR_INFO_CYCLES);
565
566                        to = (u64)((((s64)to) << 16) >> 16);
567                }
568
569                if (lbr_flags & LBR_EIP_FLAGS) {
570                        mis = !!(from & LBR_FROM_FLAG_MISPRED);
571                        pred = !mis;
572                        skip = 1;
573                }
574                if (lbr_flags & LBR_TSX) {
575                        in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
576                        abort = !!(from & LBR_FROM_FLAG_ABORT);
577                        skip = 3;
578                }
579                from = (u64)((((s64)from) << skip) >> skip);
580
581                /*
582                 * Some CPUs report duplicated abort records,
583                 * with the second entry not having an abort bit set.
584                 * Skip them here. This loop runs backwards,
585                 * so we need to undo the previous record.
586                 * If the abort just happened outside the window
587                 * the extra entry cannot be removed.
588                 */
589                if (abort && x86_pmu.lbr_double_abort && out > 0)
590                        out--;
591
592                cpuc->lbr_entries[out].from      = from;
593                cpuc->lbr_entries[out].to        = to;
594                cpuc->lbr_entries[out].mispred   = mis;
595                cpuc->lbr_entries[out].predicted = pred;
596                cpuc->lbr_entries[out].in_tx     = in_tx;
597                cpuc->lbr_entries[out].abort     = abort;
598                cpuc->lbr_entries[out].cycles    = cycles;
599                cpuc->lbr_entries[out].reserved  = 0;
600                out++;
601        }
602        cpuc->lbr_stack.nr = out;
603}
604
605void intel_pmu_lbr_read(void)
606{
607        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
608
609        if (!cpuc->lbr_users)
610                return;
611
612        if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
613                intel_pmu_lbr_read_32(cpuc);
614        else
615                intel_pmu_lbr_read_64(cpuc);
616
617        intel_pmu_lbr_filter(cpuc);
618}
619
620/*
621 * SW filter is used:
622 * - in case there is no HW filter
623 * - in case the HW filter has errata or limitations
624 */
625static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
626{
627        u64 br_type = event->attr.branch_sample_type;
628        int mask = 0;
629
630        if (br_type & PERF_SAMPLE_BRANCH_USER)
631                mask |= X86_BR_USER;
632
633        if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
634                mask |= X86_BR_KERNEL;
635
636        /* we ignore BRANCH_HV here */
637
638        if (br_type & PERF_SAMPLE_BRANCH_ANY)
639                mask |= X86_BR_ANY;
640
641        if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
642                mask |= X86_BR_ANY_CALL;
643
644        if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
645                mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
646
647        if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
648                mask |= X86_BR_IND_CALL;
649
650        if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
651                mask |= X86_BR_ABORT;
652
653        if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
654                mask |= X86_BR_IN_TX;
655
656        if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
657                mask |= X86_BR_NO_TX;
658
659        if (br_type & PERF_SAMPLE_BRANCH_COND)
660                mask |= X86_BR_JCC;
661
662        if (br_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
663                if (!x86_pmu_has_lbr_callstack())
664                        return -EOPNOTSUPP;
665                if (mask & ~(X86_BR_USER | X86_BR_KERNEL))
666                        return -EINVAL;
667                mask |= X86_BR_CALL | X86_BR_IND_CALL | X86_BR_RET |
668                        X86_BR_CALL_STACK;
669        }
670
671        if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
672                mask |= X86_BR_IND_JMP;
673
674        if (br_type & PERF_SAMPLE_BRANCH_CALL)
675                mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
676        /*
677         * stash actual user request into reg, it may
678         * be used by fixup code for some CPU
679         */
680        event->hw.branch_reg.reg = mask;
681        return 0;
682}
683
684/*
685 * setup the HW LBR filter
686 * Used only when available, may not be enough to disambiguate
687 * all branches, may need the help of the SW filter
688 */
689static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
690{
691        struct hw_perf_event_extra *reg;
692        u64 br_type = event->attr.branch_sample_type;
693        u64 mask = 0, v;
694        int i;
695
696        for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
697                if (!(br_type & (1ULL << i)))
698                        continue;
699
700                v = x86_pmu.lbr_sel_map[i];
701                if (v == LBR_NOT_SUPP)
702                        return -EOPNOTSUPP;
703
704                if (v != LBR_IGN)
705                        mask |= v;
706        }
707
708        reg = &event->hw.branch_reg;
709        reg->idx = EXTRA_REG_LBR;
710
711        /*
712         * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate
713         * in suppress mode. So LBR_SELECT should be set to
714         * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK)
715         * But the 10th bit LBR_CALL_STACK does not operate
716         * in suppress mode.
717         */
718        reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK);
719
720        if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
721            (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
722            (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
723                reg->config |= LBR_NO_INFO;
724
725        return 0;
726}
727
728int intel_pmu_setup_lbr_filter(struct perf_event *event)
729{
730        int ret = 0;
731
732        /*
733         * no LBR on this PMU
734         */
735        if (!x86_pmu.lbr_nr)
736                return -EOPNOTSUPP;
737
738        /*
739         * setup SW LBR filter
740         */
741        ret = intel_pmu_setup_sw_lbr_filter(event);
742        if (ret)
743                return ret;
744
745        /*
746         * setup HW LBR filter, if any
747         */
748        if (x86_pmu.lbr_sel_map)
749                ret = intel_pmu_setup_hw_lbr_filter(event);
750
751        return ret;
752}
753
754/*
755 * return the type of control flow change at address "from"
756 * instruction is not necessarily a branch (in case of interrupt).
757 *
758 * The branch type returned also includes the priv level of the
759 * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
760 *
761 * If a branch type is unknown OR the instruction cannot be
762 * decoded (e.g., text page not present), then X86_BR_NONE is
763 * returned.
764 */
765static int branch_type(unsigned long from, unsigned long to, int abort)
766{
767        struct insn insn;
768        void *addr;
769        int bytes_read, bytes_left;
770        int ret = X86_BR_NONE;
771        int ext, to_plm, from_plm;
772        u8 buf[MAX_INSN_SIZE];
773        int is64 = 0;
774
775        to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
776        from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
777
778        /*
779         * maybe zero if lbr did not fill up after a reset by the time
780         * we get a PMU interrupt
781         */
782        if (from == 0 || to == 0)
783                return X86_BR_NONE;
784
785        if (abort)
786                return X86_BR_ABORT | to_plm;
787
788        if (from_plm == X86_BR_USER) {
789                /*
790                 * can happen if measuring at the user level only
791                 * and we interrupt in a kernel thread, e.g., idle.
792                 */
793                if (!current->mm)
794                        return X86_BR_NONE;
795
796                /* may fail if text not present */
797                bytes_left = copy_from_user_nmi(buf, (void __user *)from,
798                                                MAX_INSN_SIZE);
799                bytes_read = MAX_INSN_SIZE - bytes_left;
800                if (!bytes_read)
801                        return X86_BR_NONE;
802
803                addr = buf;
804        } else {
805                /*
806                 * The LBR logs any address in the IP, even if the IP just
807                 * faulted. This means userspace can control the from address.
808                 * Ensure we don't blindy read any address by validating it is
809                 * a known text address.
810                 */
811                if (kernel_text_address(from)) {
812                        addr = (void *)from;
813                        /*
814                         * Assume we can get the maximum possible size
815                         * when grabbing kernel data.  This is not
816                         * _strictly_ true since we could possibly be
817                         * executing up next to a memory hole, but
818                         * it is very unlikely to be a problem.
819                         */
820                        bytes_read = MAX_INSN_SIZE;
821                } else {
822                        return X86_BR_NONE;
823                }
824        }
825
826        /*
827         * decoder needs to know the ABI especially
828         * on 64-bit systems running 32-bit apps
829         */
830#ifdef CONFIG_X86_64
831        is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
832#endif
833        insn_init(&insn, addr, bytes_read, is64);
834        insn_get_opcode(&insn);
835        if (!insn.opcode.got)
836                return X86_BR_ABORT;
837
838        switch (insn.opcode.bytes[0]) {
839        case 0xf:
840                switch (insn.opcode.bytes[1]) {
841                case 0x05: /* syscall */
842                case 0x34: /* sysenter */
843                        ret = X86_BR_SYSCALL;
844                        break;
845                case 0x07: /* sysret */
846                case 0x35: /* sysexit */
847                        ret = X86_BR_SYSRET;
848                        break;
849                case 0x80 ... 0x8f: /* conditional */
850                        ret = X86_BR_JCC;
851                        break;
852                default:
853                        ret = X86_BR_NONE;
854                }
855                break;
856        case 0x70 ... 0x7f: /* conditional */
857                ret = X86_BR_JCC;
858                break;
859        case 0xc2: /* near ret */
860        case 0xc3: /* near ret */
861        case 0xca: /* far ret */
862        case 0xcb: /* far ret */
863                ret = X86_BR_RET;
864                break;
865        case 0xcf: /* iret */
866                ret = X86_BR_IRET;
867                break;
868        case 0xcc ... 0xce: /* int */
869                ret = X86_BR_INT;
870                break;
871        case 0xe8: /* call near rel */
872                insn_get_immediate(&insn);
873                if (insn.immediate1.value == 0) {
874                        /* zero length call */
875                        ret = X86_BR_ZERO_CALL;
876                        break;
877                }
878        case 0x9a: /* call far absolute */
879                ret = X86_BR_CALL;
880                break;
881        case 0xe0 ... 0xe3: /* loop jmp */
882                ret = X86_BR_JCC;
883                break;
884        case 0xe9 ... 0xeb: /* jmp */
885                ret = X86_BR_JMP;
886                break;
887        case 0xff: /* call near absolute, call far absolute ind */
888                insn_get_modrm(&insn);
889                ext = (insn.modrm.bytes[0] >> 3) & 0x7;
890                switch (ext) {
891                case 2: /* near ind call */
892                case 3: /* far ind call */
893                        ret = X86_BR_IND_CALL;
894                        break;
895                case 4:
896                case 5:
897                        ret = X86_BR_IND_JMP;
898                        break;
899                }
900                break;
901        default:
902                ret = X86_BR_NONE;
903        }
904        /*
905         * interrupts, traps, faults (and thus ring transition) may
906         * occur on any instructions. Thus, to classify them correctly,
907         * we need to first look at the from and to priv levels. If they
908         * are different and to is in the kernel, then it indicates
909         * a ring transition. If the from instruction is not a ring
910         * transition instr (syscall, systenter, int), then it means
911         * it was a irq, trap or fault.
912         *
913         * we have no way of detecting kernel to kernel faults.
914         */
915        if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
916            && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
917                ret = X86_BR_IRQ;
918
919        /*
920         * branch priv level determined by target as
921         * is done by HW when LBR_SELECT is implemented
922         */
923        if (ret != X86_BR_NONE)
924                ret |= to_plm;
925
926        return ret;
927}
928
929/*
930 * implement actual branch filter based on user demand.
931 * Hardware may not exactly satisfy that request, thus
932 * we need to inspect opcodes. Mismatched branches are
933 * discarded. Therefore, the number of branches returned
934 * in PERF_SAMPLE_BRANCH_STACK sample may vary.
935 */
936static void
937intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
938{
939        u64 from, to;
940        int br_sel = cpuc->br_sel;
941        int i, j, type;
942        bool compress = false;
943
944        /* if sampling all branches, then nothing to filter */
945        if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
946                return;
947
948        for (i = 0; i < cpuc->lbr_stack.nr; i++) {
949
950                from = cpuc->lbr_entries[i].from;
951                to = cpuc->lbr_entries[i].to;
952
953                type = branch_type(from, to, cpuc->lbr_entries[i].abort);
954                if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
955                        if (cpuc->lbr_entries[i].in_tx)
956                                type |= X86_BR_IN_TX;
957                        else
958                                type |= X86_BR_NO_TX;
959                }
960
961                /* if type does not correspond, then discard */
962                if (type == X86_BR_NONE || (br_sel & type) != type) {
963                        cpuc->lbr_entries[i].from = 0;
964                        compress = true;
965                }
966        }
967
968        if (!compress)
969                return;
970
971        /* remove all entries with from=0 */
972        for (i = 0; i < cpuc->lbr_stack.nr; ) {
973                if (!cpuc->lbr_entries[i].from) {
974                        j = i;
975                        while (++j < cpuc->lbr_stack.nr)
976                                cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
977                        cpuc->lbr_stack.nr--;
978                        if (!cpuc->lbr_entries[i].from)
979                                continue;
980                }
981                i++;
982        }
983}
984
985/*
986 * Map interface branch filters onto LBR filters
987 */
988static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
989        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
990        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
991        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
992        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
993        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_REL_JMP
994                                                | LBR_IND_JMP | LBR_FAR,
995        /*
996         * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
997         */
998        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
999         LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
1000        /*
1001         * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
1002         */
1003        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
1004        [PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
1005        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
1006};
1007
1008static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1009        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
1010        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
1011        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
1012        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1013        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1014        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
1015                                                | LBR_FAR,
1016        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
1017        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
1018        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
1019        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
1020};
1021
1022static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
1023        [PERF_SAMPLE_BRANCH_ANY_SHIFT]          = LBR_ANY,
1024        [PERF_SAMPLE_BRANCH_USER_SHIFT]         = LBR_USER,
1025        [PERF_SAMPLE_BRANCH_KERNEL_SHIFT]       = LBR_KERNEL,
1026        [PERF_SAMPLE_BRANCH_HV_SHIFT]           = LBR_IGN,
1027        [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]   = LBR_RETURN | LBR_FAR,
1028        [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]     = LBR_REL_CALL | LBR_IND_CALL
1029                                                | LBR_FAR,
1030        [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]     = LBR_IND_CALL,
1031        [PERF_SAMPLE_BRANCH_COND_SHIFT]         = LBR_JCC,
1032        [PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]   = LBR_REL_CALL | LBR_IND_CALL
1033                                                | LBR_RETURN | LBR_CALL_STACK,
1034        [PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]     = LBR_IND_JMP,
1035        [PERF_SAMPLE_BRANCH_CALL_SHIFT]         = LBR_REL_CALL,
1036};
1037
1038/* core */
1039void __init intel_pmu_lbr_init_core(void)
1040{
1041        x86_pmu.lbr_nr     = 4;
1042        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1043        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1044        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1045
1046        /*
1047         * SW branch filter usage:
1048         * - compensate for lack of HW filter
1049         */
1050}
1051
1052/* nehalem/westmere */
1053void __init intel_pmu_lbr_init_nhm(void)
1054{
1055        x86_pmu.lbr_nr     = 16;
1056        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1057        x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1058        x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1059
1060        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1061        x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1062
1063        /*
1064         * SW branch filter usage:
1065         * - workaround LBR_SEL errata (see above)
1066         * - support syscall, sysret capture.
1067         *   That requires LBR_FAR but that means far
1068         *   jmp need to be filtered out
1069         */
1070}
1071
1072/* sandy bridge */
1073void __init intel_pmu_lbr_init_snb(void)
1074{
1075        x86_pmu.lbr_nr   = 16;
1076        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1077        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1078        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1079
1080        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1081        x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1082
1083        /*
1084         * SW branch filter usage:
1085         * - support syscall, sysret capture.
1086         *   That requires LBR_FAR but that means far
1087         *   jmp need to be filtered out
1088         */
1089}
1090
1091/* haswell */
1092void intel_pmu_lbr_init_hsw(void)
1093{
1094        x86_pmu.lbr_nr   = 16;
1095        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1096        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1097        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1098
1099        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1100        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1101
1102        if (lbr_from_signext_quirk_needed())
1103                static_branch_enable(&lbr_from_quirk_key);
1104}
1105
1106/* skylake */
1107__init void intel_pmu_lbr_init_skl(void)
1108{
1109        x86_pmu.lbr_nr   = 32;
1110        x86_pmu.lbr_tos  = MSR_LBR_TOS;
1111        x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
1112        x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
1113
1114        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1115        x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
1116
1117        /*
1118         * SW branch filter usage:
1119         * - support syscall, sysret capture.
1120         *   That requires LBR_FAR but that means far
1121         *   jmp need to be filtered out
1122         */
1123}
1124
1125/* atom */
1126void __init intel_pmu_lbr_init_atom(void)
1127{
1128        /*
1129         * only models starting at stepping 10 seems
1130         * to have an operational LBR which can freeze
1131         * on PMU interrupt
1132         */
1133        if (boot_cpu_data.x86_model == 28
1134            && boot_cpu_data.x86_mask < 10) {
1135                pr_cont("LBR disabled due to erratum");
1136                return;
1137        }
1138
1139        x86_pmu.lbr_nr     = 8;
1140        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1141        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1142        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1143
1144        /*
1145         * SW branch filter usage:
1146         * - compensate for lack of HW filter
1147         */
1148}
1149
1150/* slm */
1151void __init intel_pmu_lbr_init_slm(void)
1152{
1153        x86_pmu.lbr_nr     = 8;
1154        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1155        x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
1156        x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
1157
1158        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1159        x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
1160
1161        /*
1162         * SW branch filter usage:
1163         * - compensate for lack of HW filter
1164         */
1165        pr_cont("8-deep LBR, ");
1166}
1167
1168/* Knights Landing */
1169void intel_pmu_lbr_init_knl(void)
1170{
1171        x86_pmu.lbr_nr     = 8;
1172        x86_pmu.lbr_tos    = MSR_LBR_TOS;
1173        x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
1174        x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
1175
1176        x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
1177        x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
1178}
Note: See TracBrowser for help on using the repository browser.