1 /*        $NetBSD: dtrace_isa.c,v 1.10 2018/08/16 14:14:51 christos Exp $       */
2 
3 /*
4  * CDDL HEADER START
5  *
6  * The contents of this file are subject to the terms of the
7  * Common Development and Distribution License, Version 1.0 only
8  * (the "License").  You may not use this file except in compliance
9  * with the License.
10  *
11  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
12  * or http://www.opensolaris.org/os/licensing.
13  * See the License for the specific language governing permissions
14  * and limitations under the License.
15  *
16  * When distributing Covered Code, include this CDDL HEADER in each
17  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
18  * If applicable, add the following below this CDDL HEADER, with the
19  * fields enclosed by brackets "[]" replaced with your own identifying
20  * information: Portions Copyright [yyyy] [name of copyright owner]
21  *
22  * CDDL HEADER END
23  *
24  * $FreeBSD: head/sys/cddl/dev/dtrace/amd64/dtrace_isa.c 298171 2016-04-17 23:08:47Z markj $
25  */
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 #include <sys/cdefs.h>
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 
36 #include <machine/frame.h>
37 #include <machine/reg.h>
38 
39 #include <machine/vmparam.h>
40 
41 #include "regset.h"
42 
43 uint8_t dtrace_fuword8_nocheck(void *);
44 uint16_t dtrace_fuword16_nocheck(void *);
45 uint32_t dtrace_fuword32_nocheck(void *);
46 uint64_t dtrace_fuword64_nocheck(void *);
47 
48 #define INKERNEL(va) ((intptr_t)(va) < 0) /* XXX horror */
49 
50 struct amd64_frame {
51           struct amd64_frame  *f_frame;
52           uintptr_t            f_retaddr;
53 };
54 
55 typedef unsigned long vm_offset_t;
56 
57 int       dtrace_ustackdepth_max = 2048;
58 
59 void
dtrace_getpcstack(pc_t * pcstack,int pcstack_limit,int aframes,uint32_t * intrpc)60 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
61     uint32_t *intrpc)
62 {
63           int depth = 0;
64           register_t rbp;
65           struct amd64_frame *frame;
66           vm_offset_t callpc;
67           pc_t caller = (pc_t) solaris_cpu[cpu_number()].cpu_dtrace_caller;
68 
69           if (intrpc != 0)
70                     pcstack[depth++] = (pc_t) intrpc;
71 
72           aframes++;
73 
74           __asm __volatile("movq %%rbp,%0" : "=r" (rbp));
75 
76           frame = (struct amd64_frame *)rbp;
77           while (depth < pcstack_limit) {
78                     if (!INKERNEL((long) frame))
79                               break;
80 
81                     callpc = frame->f_retaddr;
82 
83                     if (!INKERNEL(callpc))
84                               break;
85 
86                     if (aframes > 0) {
87                               aframes--;
88                               if ((aframes == 0) && (caller != 0)) {
89                                         pcstack[depth++] = caller;
90                               }
91                     }
92                     else {
93                               pcstack[depth++] = callpc;
94                     }
95 
96                     if (frame->f_frame <= frame ||
97                         (vm_offset_t)frame->f_frame >=
98                         (vm_offset_t)rbp + KSTACK_SIZE)
99                               break;
100                     frame = frame->f_frame;
101           }
102 
103           for (; depth < pcstack_limit; depth++) {
104                     pcstack[depth] = 0;
105           }
106 }
107 
108 static int
dtrace_getustack_common(uint64_t * pcstack,int pcstack_limit,uintptr_t pc,uintptr_t sp)109 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t pc,
110     uintptr_t sp)
111 {
112           uintptr_t oldsp;
113           volatile uint16_t *flags =
114               (volatile uint16_t *)&cpu_core[cpu_number()].cpuc_dtrace_flags;
115           int ret = 0;
116 
117           ASSERT(pcstack == NULL || pcstack_limit > 0);
118           ASSERT(dtrace_ustackdepth_max > 0);
119 
120           while (pc != 0) {
121                     /*
122                      * We limit the number of times we can go around this
123                      * loop to account for a circular stack.
124                      */
125                     if (ret++ >= dtrace_ustackdepth_max) {
126                               *flags |= CPU_DTRACE_BADSTACK;
127                               cpu_core[cpu_number()].cpuc_dtrace_illval = sp;
128                               break;
129                     }
130 
131                     if (pcstack != NULL) {
132                               *pcstack++ = (uint64_t)pc;
133                               pcstack_limit--;
134                               if (pcstack_limit <= 0)
135                                         break;
136                     }
137 
138                     if (sp == 0)
139                               break;
140 
141                     oldsp = sp;
142 
143                     pc = dtrace_fuword64((void *)(sp +
144                               offsetof(struct amd64_frame, f_retaddr)));
145                     sp = dtrace_fuword64((void *)sp);
146 
147                     if (sp == oldsp) {
148                               *flags |= CPU_DTRACE_BADSTACK;
149                               cpu_core[cpu_number()].cpuc_dtrace_illval = sp;
150                               break;
151                     }
152 
153                     /*
154                      * This is totally bogus:  if we faulted, we're going to clear
155                      * the fault and break.  This is to deal with the apparently
156                      * broken Java stacks on x86.
157                      */
158                     if (*flags & CPU_DTRACE_FAULT) {
159                               *flags &= ~CPU_DTRACE_FAULT;
160                               break;
161                     }
162           }
163 
164           return (ret);
165 }
166 
167 void
dtrace_getupcstack(uint64_t * pcstack,int pcstack_limit)168 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
169 {
170           proc_t *p = curproc;
171           struct trapframe *tf;
172           uintptr_t pc, sp, fp;
173           volatile uint16_t *flags =
174               (volatile uint16_t *)&cpu_core[cpu_number()].cpuc_dtrace_flags;
175           int n;
176 
177           if (*flags & CPU_DTRACE_FAULT)
178                     return;
179 
180           if (pcstack_limit <= 0)
181                     return;
182 
183           /*
184            * If there's no user context we still need to zero the stack.
185            */
186           if (p == NULL || (tf = curlwp->l_md.md_regs) == NULL)
187                     goto zero;
188 
189           *pcstack++ = (uint64_t)p->p_pid;
190           pcstack_limit--;
191 
192           if (pcstack_limit <= 0)
193                     return;
194 
195           pc = tf->tf_rip;
196           fp = tf->tf_rbp;
197           sp = tf->tf_rsp;
198 
199           if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
200                     /*
201                      * In an entry probe.  The frame pointer has not yet been
202                      * pushed (that happens in the function prologue).  The
203                      * best approach is to add the current pc as a missing top
204                      * of stack and back the pc up to the caller, which is stored
205                      * at the current stack pointer address since the call
206                      * instruction puts it there right before the branch.
207                      */
208 
209                     *pcstack++ = (uint64_t)pc;
210                     pcstack_limit--;
211                     if (pcstack_limit <= 0)
212                               return;
213 
214                     pc = dtrace_fuword64((void *) sp);
215           }
216 
217           n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp);
218           ASSERT(n >= 0);
219           ASSERT(n <= pcstack_limit);
220 
221           pcstack += n;
222           pcstack_limit -= n;
223 
224 zero:
225           while (pcstack_limit-- > 0)
226                     *pcstack++ = 0;
227 }
228 
229 int
dtrace_getustackdepth(void)230 dtrace_getustackdepth(void)
231 {
232           proc_t *p = curproc;
233           struct trapframe *tf;
234           uintptr_t pc, fp, sp;
235           int n = 0;
236 
237           if (p == NULL || (tf = curlwp->l_md.md_regs) == NULL)
238                     return (0);
239 
240           if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
241                     return (-1);
242 
243           pc = tf->tf_rip;
244           fp = tf->tf_rbp;
245           sp = tf->tf_rsp;
246 
247           if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
248                     /*
249                      * In an entry probe.  The frame pointer has not yet been
250                      * pushed (that happens in the function prologue).  The
251                      * best approach is to add the current pc as a missing top
252                      * of stack and back the pc up to the caller, which is stored
253                      * at the current stack pointer address since the call
254                      * instruction puts it there right before the branch.
255                      */
256 
257                     pc = dtrace_fuword64((void *) sp);
258                     n++;
259           }
260 
261           n += dtrace_getustack_common(NULL, 0, pc, fp);
262 
263           return (n);
264 }
265 
266 void
dtrace_getufpstack(uint64_t * pcstack,uint64_t * fpstack,int pcstack_limit)267 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
268 {
269           proc_t *p = curproc;
270           struct trapframe *tf;
271           uintptr_t pc, sp, fp;
272           volatile uint16_t *flags =
273               (volatile uint16_t *)&cpu_core[cpu_number()].cpuc_dtrace_flags;
274 #ifdef notyet       /* XXX signal stack */
275           uintptr_t oldcontext;
276           size_t s1, s2;
277 #endif
278 
279           if (*flags & CPU_DTRACE_FAULT)
280                     return;
281 
282           if (pcstack_limit <= 0)
283                     return;
284 
285           /*
286            * If there's no user context we still need to zero the stack.
287            */
288           if (p == NULL || (tf = curlwp->l_md.md_regs) == NULL)
289                     goto zero;
290 
291           *pcstack++ = (uint64_t)p->p_pid;
292           pcstack_limit--;
293 
294           if (pcstack_limit <= 0)
295                     return;
296 
297           pc = tf->tf_rip;
298           sp = tf->tf_rsp;
299           fp = tf->tf_rbp;
300 
301 #ifdef notyet /* XXX signal stack */
302           oldcontext = lwp->lwp_oldcontext;
303           s1 = sizeof (struct xframe) + 2 * sizeof (long);
304           s2 = s1 + sizeof (siginfo_t);
305 #endif
306 
307           if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
308                     *pcstack++ = (uint64_t)pc;
309                     *fpstack++ = 0;
310                     pcstack_limit--;
311                     if (pcstack_limit <= 0)
312                               return;
313 
314                     pc = dtrace_fuword64((void *)sp);
315           }
316 
317           while (pc != 0) {
318                     *pcstack++ = (uint64_t)pc;
319                     *fpstack++ = fp;
320                     pcstack_limit--;
321                     if (pcstack_limit <= 0)
322                               break;
323 
324                     if (fp == 0)
325                               break;
326 
327 #ifdef notyet /* XXX signal stack */
328                     if (oldcontext == sp + s1 || oldcontext == sp + s2) {
329                               ucontext_t *ucp = (ucontext_t *)oldcontext;
330                               greg_t *gregs = ucp->uc_mcontext.gregs;
331 
332                               sp = dtrace_fulword(&gregs[REG_FP]);
333                               pc = dtrace_fulword(&gregs[REG_PC]);
334 
335                               oldcontext = dtrace_fulword(&ucp->uc_link);
336                     } else
337 #endif /* XXX */
338                     {
339                               pc = dtrace_fuword64((void *)(fp +
340                                         offsetof(struct amd64_frame, f_retaddr)));
341                               fp = dtrace_fuword64((void *)fp);
342                     }
343 
344                     /*
345                      * This is totally bogus:  if we faulted, we're going to clear
346                      * the fault and break.  This is to deal with the apparently
347                      * broken Java stacks on x86.
348                      */
349                     if (*flags & CPU_DTRACE_FAULT) {
350                               *flags &= ~CPU_DTRACE_FAULT;
351                               break;
352                     }
353           }
354 
355 zero:
356           while (pcstack_limit-- > 0)
357                     *pcstack++ = 0;
358 }
359 
360 /*ARGSUSED*/
361 uint64_t
dtrace_getarg(int arg,int aframes)362 dtrace_getarg(int arg, int aframes)
363 {
364           uintptr_t val;
365           struct amd64_frame *fp = (struct amd64_frame *)dtrace_getfp();
366           uintptr_t *stack;
367           int i;
368 
369           /*
370            * A total of 6 arguments are passed via registers; any argument with
371            * index of 5 or lower is therefore in a register.
372            */
373           int inreg = 5;
374 
375           for (i = 1; i <= aframes; i++) {
376                     fp = fp->f_frame;
377 
378                     if (P2ROUNDUP(fp->f_retaddr, 16) ==
379                         (long)dtrace_invop_callsite) {
380                               /*
381                                * In the case of amd64, we will use the pointer to the
382                                * regs structure that was pushed when we took the
383                                * trap.  To get this structure, we must increment
384                                * beyond the frame structure, and then again beyond
385                                * the calling RIP stored in dtrace_invop().  If the
386                                * argument that we're seeking is passed on the stack,
387                                * we'll pull the true stack pointer out of the saved
388                                * registers and decrement our argument by the number
389                                * of arguments passed in registers; if the argument
390                                * we're seeking is passed in registers, we can just
391                                * load it directly.
392                                */
393                               struct trapframe *tf = (struct trapframe *)&fp[1];
394 
395                               if (arg <= inreg) {
396                                         switch (arg) {
397                                         case 0:
398                                                   stack = (uintptr_t *)&tf->tf_rdi;
399                                                   break;
400                                         case 1:
401                                                   stack = (uintptr_t *)&tf->tf_rsi;
402                                                   break;
403                                         case 2:
404                                                   stack = (uintptr_t *)&tf->tf_rdx;
405                                                   break;
406                                         case 3:
407                                                   stack = (uintptr_t *)&tf->tf_rcx;
408                                                   break;
409                                         case 4:
410                                                   stack = (uintptr_t *)&tf->tf_r8;
411                                                   break;
412                                         case 5:
413                                                   stack = (uintptr_t *)&tf->tf_r9;
414                                                   break;
415                                         default:
416                                                   KASSERT(0);
417                                                   stack = NULL;
418                                                   break;
419                                         }
420                                         arg = 0;
421                               } else {
422                                         stack = (uintptr_t *)(tf->tf_rsp);
423                                         arg -= inreg;
424                               }
425                               goto load;
426                     }
427 
428           }
429 
430           /*
431            * We know that we did not come through a trap to get into
432            * dtrace_probe() -- the provider simply called dtrace_probe()
433            * directly.  As this is the case, we need to shift the argument
434            * that we're looking for:  the probe ID is the first argument to
435            * dtrace_probe(), so the argument n will actually be found where
436            * one would expect to find argument (n + 1).
437            */
438           arg++;
439 
440           if (arg <= inreg) {
441                     /*
442                      * This shouldn't happen.  If the argument is passed in a
443                      * register then it should have been, well, passed in a
444                      * register...
445                      */
446                     DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
447                     return (0);
448           }
449 
450           arg -= (inreg + 1);
451           stack = (uintptr_t *)&fp[1];
452 
453 load:
454           DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
455           val = stack[arg];
456           DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
457 
458           return (val);
459           return (0);
460 }
461 
462 int
dtrace_getstackdepth(int aframes)463 dtrace_getstackdepth(int aframes)
464 {
465           int depth = 0;
466           struct amd64_frame *frame;
467           vm_offset_t rbp;
468 
469           aframes++;
470           rbp = dtrace_getfp();
471           frame = (struct amd64_frame *)rbp;
472           depth++;
473           for(;;) {
474                     if (!INKERNEL((long) frame))
475                               break;
476                     if (!INKERNEL((long) frame->f_frame))
477                               break;
478                     depth++;
479                     if (frame->f_frame <= frame ||
480                         (vm_offset_t)frame->f_frame >=
481                         (vm_offset_t)rbp + KSTACK_SIZE)
482                               break;
483                     frame = frame->f_frame;
484           }
485           if (depth < aframes)
486                     return 0;
487           else
488                     return depth - aframes;
489 }
490 
491 ulong_t
dtrace_getreg(struct trapframe * rp,uint_t reg)492 dtrace_getreg(struct trapframe *rp, uint_t reg)
493 {
494           int regmap[] = {
495                     REG_GS,             /* GS */
496                     REG_FS,             /* FS */
497                     REG_ES,             /* ES */
498                     REG_DS,             /* DS */
499                     REG_RDI,  /* EDI */
500                     REG_RSI,  /* ESI */
501                     REG_RBP,  /* EBP */
502                     REG_RSP,  /* ESP */
503                     REG_RBX,  /* EBX */
504                     REG_RDX,  /* EDX */
505                     REG_RCX,  /* ECX */
506                     REG_RAX,  /* EAX */
507                     REG_TRAPNO,         /* TRAPNO */
508                     REG_ERR,  /* ERR */
509                     REG_RIP,  /* EIP */
510                     REG_CS,             /* CS */
511                     REG_RFL,  /* EFL */
512                     REG_RSP,  /* UESP */
513                     REG_SS              /* SS */
514           };
515 
516           if (reg <= SS) {
517                     if (reg >= sizeof (regmap) / sizeof (int)) {
518                               DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
519                               return (0);
520                     }
521 
522                     reg = regmap[reg];
523           } else {
524                     reg -= SS + 1;
525           }
526 
527           switch (reg) {
528           case REG_RDI:
529                     return (rp->tf_rdi);
530           case REG_RSI:
531                     return (rp->tf_rsi);
532           case REG_RDX:
533                     return (rp->tf_rdx);
534           case REG_RCX:
535                     return (rp->tf_rcx);
536           case REG_R8:
537                     return (rp->tf_r8);
538           case REG_R9:
539                     return (rp->tf_r9);
540           case REG_RAX:
541                     return (rp->tf_rax);
542           case REG_RBX:
543                     return (rp->tf_rbx);
544           case REG_RBP:
545                     return (rp->tf_rbp);
546           case REG_R10:
547                     return (rp->tf_r10);
548           case REG_R11:
549                     return (rp->tf_r11);
550           case REG_R12:
551                     return (rp->tf_r12);
552           case REG_R13:
553                     return (rp->tf_r13);
554           case REG_R14:
555                     return (rp->tf_r14);
556           case REG_R15:
557                     return (rp->tf_r15);
558           case REG_DS:
559                     return (rp->tf_ds);
560           case REG_ES:
561                     return (rp->tf_es);
562           case REG_FS:
563                     return (rp->tf_fs);
564           case REG_GS:
565                     return (rp->tf_gs);
566           case REG_TRAPNO:
567                     return (rp->tf_trapno);
568           case REG_ERR:
569                     return (rp->tf_err);
570           case REG_RIP:
571                     return (rp->tf_rip);
572           case REG_CS:
573                     return (rp->tf_cs);
574           case REG_SS:
575                     return (rp->tf_ss);
576           case REG_RFL:
577                     return (rp->tf_rflags);
578           case REG_RSP:
579                     return (rp->tf_rsp);
580           default:
581                     DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
582                     return (0);
583           }
584 }
585 
586 static int
dtrace_copycheck(uintptr_t uaddr,uintptr_t kaddr,size_t size)587 dtrace_copycheck(uintptr_t uaddr, uintptr_t kaddr, size_t size)
588 {
589           ASSERT(INKERNEL(kaddr) && kaddr + size >= kaddr);
590 
591           if (uaddr + size > VM_MAXUSER_ADDRESS || uaddr + size < uaddr) {
592                     DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
593                     cpu_core[cpu_number()].cpuc_dtrace_illval = uaddr;
594                     return (0);
595           }
596 
597           return (1);
598 }
599 
600 void
dtrace_copyin(uintptr_t uaddr,uintptr_t kaddr,size_t size,volatile uint16_t * flags)601 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
602     volatile uint16_t *flags)
603 {
604           if (dtrace_copycheck(uaddr, kaddr, size))
605                     dtrace_copy(uaddr, kaddr, size);
606 }
607 
608 void
dtrace_copyout(uintptr_t kaddr,uintptr_t uaddr,size_t size,volatile uint16_t * flags)609 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
610     volatile uint16_t *flags)
611 {
612           if (dtrace_copycheck(uaddr, kaddr, size))
613                     dtrace_copy(kaddr, uaddr, size);
614 }
615 
616 void
dtrace_copyinstr(uintptr_t uaddr,uintptr_t kaddr,size_t size,volatile uint16_t * flags)617 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
618     volatile uint16_t *flags)
619 {
620           if (dtrace_copycheck(uaddr, kaddr, size))
621                     dtrace_copystr(uaddr, kaddr, size, flags);
622 }
623 
624 void
dtrace_copyoutstr(uintptr_t kaddr,uintptr_t uaddr,size_t size,volatile uint16_t * flags)625 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
626     volatile uint16_t *flags)
627 {
628           if (dtrace_copycheck(uaddr, kaddr, size))
629                     dtrace_copystr(kaddr, uaddr, size, flags);
630 }
631 
632 uint8_t
dtrace_fuword8(void * uaddr)633 dtrace_fuword8(void *uaddr)
634 {
635           if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
636                     DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
637                     cpu_core[cpu_number()].cpuc_dtrace_illval = (uintptr_t)uaddr;
638                     return (0);
639           }
640           return (dtrace_fuword8_nocheck(uaddr));
641 }
642 
643 uint16_t
dtrace_fuword16(void * uaddr)644 dtrace_fuword16(void *uaddr)
645 {
646           if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
647                     DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
648                     cpu_core[cpu_number()].cpuc_dtrace_illval = (uintptr_t)uaddr;
649                     return (0);
650           }
651           return (dtrace_fuword16_nocheck(uaddr));
652 }
653 
654 uint32_t
dtrace_fuword32(void * uaddr)655 dtrace_fuword32(void *uaddr)
656 {
657           if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
658                     DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
659                     cpu_core[cpu_number()].cpuc_dtrace_illval = (uintptr_t)uaddr;
660                     return (0);
661           }
662           return (dtrace_fuword32_nocheck(uaddr));
663 }
664 
665 uint64_t
dtrace_fuword64(void * uaddr)666 dtrace_fuword64(void *uaddr)
667 {
668           if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
669                     DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
670                     cpu_core[cpu_number()].cpuc_dtrace_illval = (uintptr_t)uaddr;
671                     return (0);
672           }
673           return (dtrace_fuword64_nocheck(uaddr));
674 }
675