commit da9bd35afc2269529b029dd22815e04362e89e5b
Author: Dave Anderson <anderson@redhat.com>
Date: Wed Oct 11 11:17:30 2017 -0400
Fix for the "runq" command on Linux 4.14 and later kernels that
contain commit cd9e61ed1eebbcd5dfad59475d41ec58d9b64b6a, titled
"rbtree: cache leftmost node internally". Without the patch,
the command fails with the error message "runq: invalid structure
member offset: cfs_rq_rb_leftmost".
(anderson@redhat.com)
diff --git a/task.c b/task.c
index 88706bf..2b12af0 100644
--- a/task.c
+++ b/task.c
@@ -8765,10 +8765,15 @@ cfs_rq_offset_init(void)
MEMBER_OFFSET_INIT(sched_rt_entity_my_q, "sched_rt_entity",
"my_q");
MEMBER_OFFSET_INIT(sched_entity_on_rq, "sched_entity", "on_rq");
- MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
- MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
MEMBER_OFFSET_INIT(cfs_rq_tasks_timeline, "cfs_rq",
"tasks_timeline");
+ MEMBER_OFFSET_INIT(cfs_rq_rb_leftmost, "cfs_rq", "rb_leftmost");
+ if (INVALID_MEMBER(cfs_rq_rb_leftmost) &&
+ VALID_MEMBER(cfs_rq_tasks_timeline) &&
+ MEMBER_EXISTS("rb_root_cached", "rb_leftmost"))
+ ASSIGN_OFFSET(cfs_rq_rb_leftmost) = OFFSET(cfs_rq_tasks_timeline) +
+ MEMBER_OFFSET("rb_root_cached", "rb_leftmost");
+ MEMBER_OFFSET_INIT(cfs_rq_nr_running, "cfs_rq", "nr_running");
MEMBER_OFFSET_INIT(cfs_rq_curr, "cfs_rq", "curr");
MEMBER_OFFSET_INIT(rt_rq_active, "rt_rq", "active");
MEMBER_OFFSET_INIT(task_struct_run_list, "task_struct",
commit 9e5255af26233e7ef051ebdd8bdccbd15d0d9256
Author: Dave Anderson <anderson@redhat.com>
Date: Wed Oct 11 16:11:34 2017 -0400
Fix to prevent a useless message during session inialization.
Without the patch, if the highest possible node bit in the
node_states[N_ONLINE] multi-word bitmask is set, then a message
such as "crash: next_online_node: 256 is too large!" will be
displayed.
(anderson@redhat.com)
diff --git a/memory.c b/memory.c
index 8efe0b2..9c9a40d 100644
--- a/memory.c
+++ b/memory.c
@@ -17200,10 +17200,8 @@ next_online_node(int first)
int i, j, node;
ulong mask, *maskptr;
- if ((first/BITS_PER_LONG) >= vt->node_online_map_len) {
- error(INFO, "next_online_node: %d is too large!\n", first);
+ if ((first/BITS_PER_LONG) >= vt->node_online_map_len)
return -1;
- }
maskptr = (ulong *)vt->node_online_map;
for (i = node = 0; i < vt->node_online_map_len; i++, maskptr++) {
commit 2b93c036edf2a5cc21a06a14f377cd9b365f858a
Author: Dave Anderson <anderson@redhat.com>
Date: Tue Oct 17 15:40:17 2017 -0400
Additional fixes for the ARM64 "bt" command for Linux 4.14 kernels.
The patch corrects the contents of in-kernel exception frame register
dumps, and properly transitions the backtrace from the IRQ stack
to the process stack.
(takahiro.akashi@linaro.org)
diff --git a/arm64.c b/arm64.c
index 20c5d34..c75669b 100644
--- a/arm64.c
+++ b/arm64.c
@@ -72,6 +72,7 @@ static void arm64_cmd_mach(void);
static void arm64_display_machine_stats(void);
static int arm64_get_smp_cpus(void);
static void arm64_clear_machdep_cache(void);
+static int arm64_on_process_stack(struct bt_info *, ulong);
static int arm64_in_alternate_stack(int, ulong);
static int arm64_on_irq_stack(int, ulong);
static void arm64_set_irq_stack(struct bt_info *);
@@ -1333,34 +1334,64 @@ arm64_irq_stack_init(void)
int i;
struct syment *sp;
struct gnu_request request, *req;
- req = &request;
struct machine_specific *ms = machdep->machspec;
+ ulong p;
+ req = &request;
- if (!symbol_exists("irq_stack") ||
- !(sp = per_cpu_symbol_search("irq_stack")) ||
- !get_symbol_type("irq_stack", NULL, req) ||
- (req->typecode != TYPE_CODE_ARRAY) ||
- (req->target_typecode != TYPE_CODE_INT))
- return;
-
- if (CRASHDEBUG(1)) {
- fprintf(fp, "irq_stack: \n");
- fprintf(fp, " type: %s\n",
- (req->typecode == TYPE_CODE_ARRAY) ? "TYPE_CODE_ARRAY" : "other");
- fprintf(fp, " target_typecode: %s\n",
- req->target_typecode == TYPE_CODE_INT ? "TYPE_CODE_INT" : "other");
- fprintf(fp, " target_length: %ld\n", req->target_length);
- fprintf(fp, " length: %ld\n", req->length);
- }
-
- ms->irq_stack_size = req->length;
- if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
- error(FATAL, "cannot malloc irq_stack addresses\n");
+ if (symbol_exists("irq_stack") &&
+ (sp = per_cpu_symbol_search("irq_stack")) &&
+ get_symbol_type("irq_stack", NULL, req)) {
+ /* before v4.14 or CONFIG_VMAP_STACK disabled */
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "irq_stack: \n");
+ fprintf(fp, " type: %s\n",
+ (req->typecode == TYPE_CODE_ARRAY) ?
+ "TYPE_CODE_ARRAY" : "other");
+ fprintf(fp, " target_typecode: %s\n",
+ req->target_typecode == TYPE_CODE_INT ?
+ "TYPE_CODE_INT" : "other");
+ fprintf(fp, " target_length: %ld\n",
+ req->target_length);
+ fprintf(fp, " length: %ld\n", req->length);
+ }
+
+ if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
+ error(FATAL, "cannot malloc irq_stack addresses\n");
+ ms->irq_stack_size = req->length;
+ machdep->flags |= IRQ_STACKS;
- for (i = 0; i < kt->cpus; i++)
- ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value;
+ for (i = 0; i < kt->cpus; i++)
+ ms->irq_stacks[i] = kt->__per_cpu_offset[i] + sp->value;
+ } else if (symbol_exists("irq_stack_ptr") &&
+ (sp = per_cpu_symbol_search("irq_stack_ptr")) &&
+ get_symbol_type("irq_stack_ptr", NULL, req)) {
+ /* v4.14 and later with CONFIG_VMAP_STACK enabled */
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "irq_stack_ptr: \n");
+ fprintf(fp, " type: %x, %s\n",
+ (int)req->typecode,
+ (req->typecode == TYPE_CODE_PTR) ?
+ "TYPE_CODE_PTR" : "other");
+ fprintf(fp, " target_typecode: %x, %s\n",
+ (int)req->target_typecode,
+ req->target_typecode == TYPE_CODE_INT ?
+ "TYPE_CODE_INT" : "other");
+ fprintf(fp, " target_length: %ld\n",
+ req->target_length);
+ fprintf(fp, " length: %ld\n", req->length);
+ }
+
+ if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
+ error(FATAL, "cannot malloc irq_stack addresses\n");
+ ms->irq_stack_size = 16384;
+ machdep->flags |= IRQ_STACKS;
- machdep->flags |= IRQ_STACKS;
+ for (i = 0; i < kt->cpus; i++) {
+ p = kt->__per_cpu_offset[i] + sp->value;
+ readmem(p, KVADDR, &(ms->irq_stacks[i]), sizeof(ulong),
+ "IRQ stack pointer", RETURN_ON_ERROR);
+ }
+ }
}
/*
@@ -1750,11 +1781,20 @@ arm64_display_full_frame(struct bt_info *bt, ulong sp)
if (bt->frameptr == sp)
return;
- if (!INSTACK(sp, bt) || !INSTACK(bt->frameptr, bt)) {
- if (sp == 0)
- sp = bt->stacktop - USER_EFRAME_OFFSET;
- else
- return;
+ if (INSTACK(bt->frameptr, bt)) {
+ if (INSTACK(sp, bt)) {
+ ; /* normal case */
+ } else {
+ if (sp == 0)
+ /* interrupt in user mode */
+ sp = bt->stacktop - USER_EFRAME_OFFSET;
+ else
+ /* interrupt in kernel mode */
+ sp = bt->stacktop;
+ }
+ } else {
+ /* IRQ exception frame */
+ return;
}
words = (sp - bt->frameptr) / sizeof(ulong);
@@ -1860,6 +1900,9 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
if ((frame->fp == 0) && (frame->pc == 0))
return FALSE;
+ if (!(machdep->flags & IRQ_STACKS))
+ return TRUE;
+
/*
* The kernel's manner of determining the end of the IRQ stack:
*
@@ -1872,7 +1915,25 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
* irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
* orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack)
*/
- if (machdep->flags & IRQ_STACKS) {
+ if (machdep->flags & UNW_4_14) {
+ if ((bt->flags & BT_IRQSTACK) &&
+ !arm64_on_irq_stack(bt->tc->processor, frame->fp)) {
+ if (arm64_on_process_stack(bt, frame->fp)) {
+ arm64_set_process_stack(bt);
+
+ frame->sp = frame->fp - SIZE(pt_regs) + 16;
+ /* for switch_stack */
+ /* fp still points to irq stack */
+ bt->bptr = fp;
+ /* for display_full_frame */
+ /* sp points to process stack */
+ bt->frameptr = frame->sp;
+ } else {
+ /* irq -> user */
+ return FALSE;
+ }
+ }
+ } else { /* !UNW_4_14 */
ms = machdep->machspec;
irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16;
@@ -1896,7 +1957,7 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
return FALSE;
}
}
- }
+ } /* UNW_4_14 */
return TRUE;
}
@@ -2086,10 +2147,17 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
* We are on process stack. Just add a faked frame
*/
- if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp))
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs);
- else {
+ if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) {
+ if (MEMBER_EXISTS("pt_regs", "stackframe")) {
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs) - 16;
+ frame->fp = ext_frame.fp;
+ } else {
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs);
+ frame->fp = frame->sp;
+ }
+ } else {
/*
* FIXME: very exceptional case
* We are already back on process stack, but
@@ -2109,10 +2177,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
* Really ugly
*/
frame->sp = frame->fp + 0x20;
+ frame->fp = frame->sp;
fprintf(ofp, " (Next exception frame might be wrong)\n");
}
- frame->fp = frame->sp;
} else {
/* We are on IRQ stack */
@@ -2122,9 +2190,15 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
if (ext_frame.fp != irq_stack_ptr) {
/* (2) Just add a faked frame */
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs);
- frame->fp = frame->sp;
+ if (MEMBER_EXISTS("pt_regs", "stackframe")) {
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs);
+ frame->fp = ext_frame.fp;
+ } else {
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs) - 16;
+ frame->fp = frame->sp;
+ }
} else {
/*
* (3)
@@ -2303,12 +2377,17 @@ arm64_back_trace_cmd(struct bt_info *bt)
if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) {
if (!(bt->flags & BT_IRQSTACK) ||
- (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop)))
- exception_frame = stackframe.fp - SIZE(pt_regs);
+ (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) {
+ if (MEMBER_EXISTS("pt_regs", "stackframe"))
+ /* v4.14 or later */
+ exception_frame = stackframe.fp - SIZE(pt_regs) + 16;
+ else
+ exception_frame = stackframe.fp - SIZE(pt_regs);
+ }
}
if ((bt->flags & BT_IRQSTACK) &&
- !arm64_on_irq_stack(bt->tc->processor, stackframe.sp)) {
+ !arm64_on_irq_stack(bt->tc->processor, stackframe.fp)) {
bt->flags &= ~BT_IRQSTACK;
if (arm64_switch_stack(bt, &stackframe, ofp) == USER_MODE)
break;
@@ -2424,6 +2503,8 @@ user_space:
* otherwise show an exception frame.
* Since exception entry code doesn't have a real
* stackframe, we fake a dummy frame here.
+ * Note: Since we have a real stack frame in pt_regs,
+ * We no longer need a dummy frame on v4.14 or later.
*/
if (!arm64_in_exp_entry(stackframe.pc))
continue;
@@ -2669,7 +2750,9 @@ arm64_switch_stack(struct bt_info *bt, struct arm64_stackframe *frame, FILE *ofp
if (frame->fp == 0)
return USER_MODE;
- arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+ if (!(machdep->flags & UNW_4_14))
+ arm64_print_exception_frame(bt, frame->sp, KERNEL_MODE, ofp);
+
return KERNEL_MODE;
}
@@ -3363,6 +3446,20 @@ arm64_clear_machdep_cache(void) {
}
static int
+arm64_on_process_stack(struct bt_info *bt, ulong stkptr)
+{
+ ulong stackbase, stacktop;
+
+ stackbase = GET_STACKBASE(bt->task);
+ stacktop = GET_STACKTOP(bt->task);
+
+ if ((stkptr >= stackbase) && (stkptr < stacktop))
+ return TRUE;
+
+ return FALSE;
+}
+
+static int
arm64_on_irq_stack(int cpu, ulong stkptr)
{
return arm64_in_alternate_stack(cpu, stkptr);
commit 30950ba8885fb39a1ed7b071cdb225e3ec38e7b3
Author: Dave Anderson <anderson@redhat.com>
Date: Tue Oct 17 16:20:19 2017 -0400
Implemented a new "search -T" option, which is identical to the
"search -t" option, except that the search is restricted to the
kernel stacks of active tasks.
(atomlin@redhat.com)
diff --git a/help.c b/help.c
index 2d80202..a9aab37 100644
--- a/help.c
+++ b/help.c
@@ -2862,7 +2862,7 @@ NULL
char *help_search[] = {
"search",
"search memory",
-"[-s start] [ -[kKV] | -u | -p | -t ] [-e end | -l length] [-m mask]\n"
+"[-s start] [ -[kKV] | -u | -p | -t | -T ] [-e end | -l length] [-m mask]\n"
" [-x count] -[cwh] [value | (expression) | symbol | string] ...",
" This command searches for a given value within a range of user virtual, kernel",
" virtual, or physical memory space. If no end nor length value is entered, ",
@@ -2893,6 +2893,7 @@ char *help_search[] = {
" -t Search only the kernel stack pages of every task. If one or more",
" matches are found in a task's kernel stack, precede the output",
" with a task-identifying header.",
+" -T Same as -t, except only the active task(s) are considered.",
" -e end Stop the search at this hexadecimal user or kernel virtual",
" address, kernel symbol, or physical address. The end address",
" must be appropriate for the memory type specified.",
diff --git a/memory.c b/memory.c
index 9c9a40d..fb534e8 100644
--- a/memory.c
+++ b/memory.c
@@ -13882,7 +13882,7 @@ cmd_search(void)
ulong value, mask, len;
ulong uvaddr_start, uvaddr_end;
ulong kvaddr_start, kvaddr_end, range_end;
- int sflag, Kflag, Vflag, pflag, tflag;
+ int sflag, Kflag, Vflag, pflag, Tflag, tflag;
struct searchinfo searchinfo;
struct syment *sp;
struct node_table *nt;
@@ -13896,7 +13896,7 @@ cmd_search(void)
context = max = 0;
start = end = 0;
- value = mask = sflag = pflag = Kflag = Vflag = memtype = len = tflag = 0;
+ value = mask = sflag = pflag = Kflag = Vflag = memtype = len = Tflag = tflag = 0;
kvaddr_start = kvaddr_end = 0;
uvaddr_start = UNINITIALIZED;
uvaddr_end = COMMON_VADDR_SPACE() ? (ulong)(-1) : machdep->kvbase;
@@ -13933,7 +13933,7 @@ cmd_search(void)
searchinfo.mode = SEARCH_ULONG; /* default search */
- while ((c = getopt(argcnt, args, "tl:ukKVps:e:v:m:hwcx:")) != EOF) {
+ while ((c = getopt(argcnt, args, "Ttl:ukKVps:e:v:m:hwcx:")) != EOF) {
switch(c)
{
case 'u':
@@ -14038,12 +14038,19 @@ cmd_search(void)
context = dtoi(optarg, FAULT_ON_ERROR, NULL);
break;
+ case 'T':
case 't':
if (XEN_HYPER_MODE())
error(FATAL,
- "-t option is not applicable to the "
- "Xen hypervisor\n");
- tflag++;
+ "-%c option is not applicable to the "
+ "Xen hypervisor\n", c);
+ if (c == 'T')
+ Tflag++;
+ else if (c == 't')
+ tflag++;
+ if (tflag && Tflag)
+ error(FATAL,
+ "-t and -T options are mutually exclusive\n");
break;
default:
@@ -14052,10 +14059,11 @@ cmd_search(void)
}
}
- if (tflag && (memtype || start || end || len))
+ if ((tflag || Tflag) && (memtype || start || end || len))
error(FATAL,
- "-t option cannot be used with other "
- "memory-selection options\n");
+ "-%c option cannot be used with other "
+ "memory-selection options\n",
+ tflag ? 't' : 'T');
if (XEN_HYPER_MODE()) {
memtype = KVADDR;
@@ -14328,10 +14336,12 @@ cmd_search(void)
break;
}
- if (tflag) {
+ if (tflag || Tflag) {
searchinfo.tasks_found = 0;
tc = FIRST_CONTEXT();
for (i = 0; i < RUNNING_TASKS(); i++, tc++) {
+ if (Tflag && !is_task_active(tc->task))
+ continue;
searchinfo.vaddr_start = GET_STACKBASE(tc->task);
searchinfo.vaddr_end = GET_STACKTOP(tc->task);
searchinfo.task_context = tc;
commit 090bf28907782549ba980c588979372061764aa7
Author: Dave Anderson <anderson@redhat.com>
Date: Fri Oct 20 14:23:36 2017 -0400
Removal of the ARM64 "bt -o" option for Linux 4.14 and later kernels,
along with several cleanups/readability improvements.
(takahiro.akashi@linaro.org)
diff --git a/arm64.c b/arm64.c
index c75669b..7904f65 100644
--- a/arm64.c
+++ b/arm64.c
@@ -612,6 +612,7 @@ arm64_dump_machdep_table(ulong arg)
fprintf(fp, " exp_entry2_end: %lx\n", ms->exp_entry2_end);
fprintf(fp, " panic_task_regs: %lx\n", (ulong)ms->panic_task_regs);
fprintf(fp, " user_eframe_offset: %ld\n", ms->user_eframe_offset);
+ fprintf(fp, " kern_eframe_offset: %ld\n", ms->kern_eframe_offset);
fprintf(fp, " PTE_PROT_NONE: %lx\n", ms->PTE_PROT_NONE);
fprintf(fp, " PTE_FILE: ");
if (ms->PTE_FILE)
@@ -1383,7 +1384,7 @@ arm64_irq_stack_init(void)
if (!(ms->irq_stacks = (ulong *)malloc((size_t)(kt->cpus * sizeof(ulong)))))
error(FATAL, "cannot malloc irq_stack addresses\n");
- ms->irq_stack_size = 16384;
+ ms->irq_stack_size = ARM64_IRQ_STACK_SIZE;
machdep->flags |= IRQ_STACKS;
for (i = 0; i < kt->cpus; i++) {
@@ -1410,10 +1411,13 @@ arm64_stackframe_init(void)
MEMBER_OFFSET_INIT(elf_prstatus_pr_pid, "elf_prstatus", "pr_pid");
MEMBER_OFFSET_INIT(elf_prstatus_pr_reg, "elf_prstatus", "pr_reg");
- if (MEMBER_EXISTS("pt_regs", "stackframe"))
+ if (MEMBER_EXISTS("pt_regs", "stackframe")) {
machdep->machspec->user_eframe_offset = SIZE(pt_regs);
- else
+ machdep->machspec->kern_eframe_offset = SIZE(pt_regs) - 16;
+ } else {
machdep->machspec->user_eframe_offset = SIZE(pt_regs) + 16;
+ machdep->machspec->kern_eframe_offset = SIZE(pt_regs);
+ }
machdep->machspec->__exception_text_start =
symbol_value("__exception_text_start");
@@ -1503,6 +1507,7 @@ arm64_stackframe_init(void)
#define USER_MODE (2)
#define USER_EFRAME_OFFSET (machdep->machspec->user_eframe_offset)
+#define KERN_EFRAME_OFFSET (machdep->machspec->kern_eframe_offset)
/*
* PSR bits
@@ -1793,7 +1798,7 @@ arm64_display_full_frame(struct bt_info *bt, ulong sp)
sp = bt->stacktop;
}
} else {
- /* IRQ exception frame */
+ /* This is a transition case from irq to process stack. */
return;
}
@@ -1903,61 +1908,73 @@ arm64_unwind_frame(struct bt_info *bt, struct arm64_stackframe *frame)
if (!(machdep->flags & IRQ_STACKS))
return TRUE;
- /*
- * The kernel's manner of determining the end of the IRQ stack:
- *
- * #define THREAD_SIZE 16384
- * #define THREAD_START_SP (THREAD_SIZE - 16)
- * #define IRQ_STACK_START_SP THREAD_START_SP
- * #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
- * #define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
- *
- * irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
- * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack)
- */
+ if (!(machdep->flags & IRQ_STACKS))
+ return TRUE;
+
if (machdep->flags & UNW_4_14) {
if ((bt->flags & BT_IRQSTACK) &&
!arm64_on_irq_stack(bt->tc->processor, frame->fp)) {
if (arm64_on_process_stack(bt, frame->fp)) {
arm64_set_process_stack(bt);
- frame->sp = frame->fp - SIZE(pt_regs) + 16;
- /* for switch_stack */
- /* fp still points to irq stack */
+ frame->sp = frame->fp - KERN_EFRAME_OFFSET;
+ /*
+ * for switch_stack
+ * fp still points to irq stack
+ */
bt->bptr = fp;
- /* for display_full_frame */
- /* sp points to process stack */
- bt->frameptr = frame->sp;
+ /*
+ * for display_full_frame
+ * sp points to process stack
+ *
+ * If we want to see pt_regs,
+ * comment out the below.
+ * bt->frameptr = frame->sp;
+ */
} else {
/* irq -> user */
return FALSE;
}
}
- } else { /* !UNW_4_14 */
- ms = machdep->machspec;
- irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16;
-
- if (frame->sp == irq_stack_ptr) {
- orig_sp = GET_STACK_ULONG(irq_stack_ptr - 8);
- arm64_set_process_stack(bt);
- if (INSTACK(orig_sp, bt) && (INSTACK(frame->fp, bt) || (frame->fp == 0))) {
- ptregs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(orig_sp))];
- frame->sp = orig_sp;
- frame->pc = ptregs->pc;
- bt->bptr = fp;
- if (CRASHDEBUG(1))
- error(INFO,
- "arm64_unwind_frame: switch stacks: fp: %lx sp: %lx pc: %lx\n",
- frame->fp, frame->sp, frame->pc);
- } else {
- error(WARNING,
- "arm64_unwind_frame: on IRQ stack: oriq_sp: %lx%s fp: %lx%s\n",
- orig_sp, INSTACK(orig_sp, bt) ? "" : " (?)",
- frame->fp, INSTACK(frame->fp, bt) ? "" : " (?)");
- return FALSE;
- }
+
+ return TRUE;
+ }
+
+ /*
+ * The kernel's manner of determining the end of the IRQ stack:
+ *
+ * #define THREAD_SIZE 16384
+ * #define THREAD_START_SP (THREAD_SIZE - 16)
+ * #define IRQ_STACK_START_SP THREAD_START_SP
+ * #define IRQ_STACK_PTR(cpu) ((unsigned long)per_cpu(irq_stack, cpu) + IRQ_STACK_START_SP)
+ * #define IRQ_STACK_TO_TASK_STACK(ptr) (*((unsigned long *)((ptr) - 0x08)))
+ *
+ * irq_stack_ptr = IRQ_STACK_PTR(raw_smp_processor_id());
+ * orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); (pt_regs pointer on process stack)
+ */
+ ms = machdep->machspec;
+ irq_stack_ptr = ms->irq_stacks[bt->tc->processor] + ms->irq_stack_size - 16;
+
+ if (frame->sp == irq_stack_ptr) {
+ orig_sp = GET_STACK_ULONG(irq_stack_ptr - 8);
+ arm64_set_process_stack(bt);
+ if (INSTACK(orig_sp, bt) && (INSTACK(frame->fp, bt) || (frame->fp == 0))) {
+ ptregs = (struct arm64_pt_regs *)&bt->stackbuf[(ulong)(STACK_OFFSET_TYPE(orig_sp))];
+ frame->sp = orig_sp;
+ frame->pc = ptregs->pc;
+ bt->bptr = fp;
+ if (CRASHDEBUG(1))
+ error(INFO,
+ "arm64_unwind_frame: switch stacks: fp: %lx sp: %lx pc: %lx\n",
+ frame->fp, frame->sp, frame->pc);
+ } else {
+ error(WARNING,
+ "arm64_unwind_frame: on IRQ stack: oriq_sp: %lx%s fp: %lx%s\n",
+ orig_sp, INSTACK(orig_sp, bt) ? "" : " (?)",
+ frame->fp, INSTACK(frame->fp, bt) ? "" : " (?)");
+ return FALSE;
}
- } /* UNW_4_14 */
+ }
return TRUE;
}
@@ -2147,17 +2164,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
* We are on process stack. Just add a faked frame
*/
- if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp)) {
- if (MEMBER_EXISTS("pt_regs", "stackframe")) {
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs) - 16;
- frame->fp = ext_frame.fp;
- } else {
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs);
- frame->fp = frame->sp;
- }
- } else {
+ if (!arm64_on_irq_stack(bt->tc->processor, ext_frame.fp))
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs);
+ else {
/*
* FIXME: very exceptional case
* We are already back on process stack, but
@@ -2177,10 +2187,10 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
* Really ugly
*/
frame->sp = frame->fp + 0x20;
- frame->fp = frame->sp;
fprintf(ofp, " (Next exception frame might be wrong)\n");
}
+ frame->fp = frame->sp;
} else {
/* We are on IRQ stack */
@@ -2190,15 +2200,9 @@ arm64_unwind_frame_v2(struct bt_info *bt, struct arm64_stackframe *frame,
if (ext_frame.fp != irq_stack_ptr) {
/* (2) Just add a faked frame */
- if (MEMBER_EXISTS("pt_regs", "stackframe")) {
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs);
- frame->fp = ext_frame.fp;
- } else {
- frame->sp = ext_frame.fp
- - sizeof(struct arm64_pt_regs) - 16;
- frame->fp = frame->sp;
- }
+ frame->sp = ext_frame.fp
+ - sizeof(struct arm64_pt_regs);
+ frame->fp = frame->sp;
} else {
/*
* (3)
@@ -2285,6 +2289,11 @@ arm64_back_trace_cmd(struct bt_info *bt)
FILE *ofp;
if (bt->flags & BT_OPT_BACK_TRACE) {
+ if (machdep->flags & UNW_4_14) {
+ option_not_supported('o');
+ return;
+ }
+
arm64_back_trace_cmd_v2(bt);
return;
}
@@ -2346,7 +2355,7 @@ arm64_back_trace_cmd(struct bt_info *bt)
goto complete_user;
if (DUMPFILE() && is_task_active(bt->task)) {
- exception_frame = stackframe.fp - SIZE(pt_regs);
+ exception_frame = stackframe.fp - KERN_EFRAME_OFFSET;
if (arm64_is_kernel_exception_frame(bt, exception_frame))
arm64_print_exception_frame(bt, exception_frame,
KERNEL_MODE, ofp);
@@ -2377,13 +2386,8 @@ arm64_back_trace_cmd(struct bt_info *bt)
if (arm64_in_exception_text(bt->instptr) && INSTACK(stackframe.fp, bt)) {
if (!(bt->flags & BT_IRQSTACK) ||
- (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop))) {
- if (MEMBER_EXISTS("pt_regs", "stackframe"))
- /* v4.14 or later */
- exception_frame = stackframe.fp - SIZE(pt_regs) + 16;
- else
- exception_frame = stackframe.fp - SIZE(pt_regs);
- }
+ (((stackframe.sp + SIZE(pt_regs)) < bt->stacktop)))
+ exception_frame = stackframe.fp - KERN_EFRAME_OFFSET;
}
if ((bt->flags & BT_IRQSTACK) &&
@@ -2503,8 +2507,6 @@ user_space:
* otherwise show an exception frame.
* Since exception entry code doesn't have a real
* stackframe, we fake a dummy frame here.
- * Note: Since we have a real stack frame in pt_regs,
- * We no longer need a dummy frame on v4.14 or later.
*/
if (!arm64_in_exp_entry(stackframe.pc))
continue;
diff --git a/defs.h b/defs.h
index 7768895..a694a66 100644
--- a/defs.h
+++ b/defs.h
@@ -3038,6 +3038,7 @@ typedef signed int s32;
#define ARM64_VMEMMAP_END (ARM64_VMEMMAP_VADDR + GIGABYTES(8UL) - 1)
#define ARM64_STACK_SIZE (16384)
+#define ARM64_IRQ_STACK_SIZE ARM64_STACK_SIZE
#define _SECTION_SIZE_BITS 30
#define _MAX_PHYSMEM_BITS 40
@@ -3117,6 +3118,8 @@ struct machine_specific {
ulong kimage_text;
ulong kimage_end;
ulong user_eframe_offset;
+ /* for v4.14 or later */
+ ulong kern_eframe_offset;
};
struct arm64_stackframe {
diff --git a/help.c b/help.c
index a9aab37..f9c5792 100644
--- a/help.c
+++ b/help.c
@@ -1799,7 +1799,8 @@ char *help_bt[] = {
" It does so by verifying the thread_info.task pointer, ensuring that",
" the thread_info.cpu is a valid cpu number, and checking the end of ",
" the stack for the STACK_END_MAGIC value.",
-" -o arm64: use optional backtrace method.",
+" -o arm64: use optional backtrace method; not supported on Linux 4.14 or",
+" later kernels.",
" x86: use old backtrace method, permissible only on kernels that were",
" compiled without the -fomit-frame_pointer.",
" x86_64: use old backtrace method, which dumps potentially stale",
diff --git a/task.c b/task.c
index 2b12af0..362822c 100644
--- a/task.c
+++ b/task.c
@@ -6750,6 +6750,8 @@ panic_search(void)
fd->keyword_array[0] = FOREACH_BT;
if (machine_type("S390X"))
fd->flags |= FOREACH_o_FLAG;
+ else if (machine_type("ARM64"))
+ fd->flags |= FOREACH_t_FLAG;
else
fd->flags |= (FOREACH_t_FLAG|FOREACH_o_FLAG);
commit 45b74b89530d611b3fa95a1041e158fbb865fa84
Author: Dave Anderson <anderson@redhat.com>
Date: Mon Oct 23 11:15:39 2017 -0400
Fix for support of KASLR enabled kernels captured by the SADUMP
dumpfile facility. SADUMP dumpfile headers do not contain phys_base
or VMCOREINFO notes, so without this patch, the crash session fails
during initialization with the message "crash: seek error: kernel
virtual address: <address> type: "page_offset_base". This patch
calculates the phys_base value and the KASLR offset using the IDTR
and CR3 registers from the dumpfile header.
(indou.takao@jp.fujitsu.com)
diff --git a/defs.h b/defs.h
index a694a66..76e5512 100644
--- a/defs.h
+++ b/defs.h
@@ -2591,6 +2591,9 @@ struct symbol_table_data {
ulong last_section_end;
ulong _stext_vmlinux;
struct downsized downsized;
+ ulong divide_error_vmlinux;
+ ulong idt_table_vmlinux;
+ ulong saved_command_line_vmlinux;
};
/* flags for st */
@@ -6312,6 +6315,7 @@ void sadump_set_zero_excluded(void);
void sadump_unset_zero_excluded(void);
struct sadump_data;
struct sadump_data *get_sadump_data(void);
+int sadump_calc_kaslr_offset(ulong *);
/*
* qemu.c
diff --git a/sadump.c b/sadump.c
index a96ba9c..2ccfa82 100644
--- a/sadump.c
+++ b/sadump.c
@@ -1558,12 +1558,17 @@ sadump_display_regs(int cpu, FILE *ofp)
*/
int sadump_phys_base(ulong *phys_base)
{
- if (SADUMP_VALID()) {
+ if (SADUMP_VALID() && !sd->phys_base) {
if (CRASHDEBUG(1))
error(NOTE, "sadump: does not save phys_base.\n");
return FALSE;
}
+ if (sd->phys_base) {
+ *phys_base = sd->phys_base;
+ return TRUE;
+ }
+
return FALSE;
}
@@ -1649,3 +1654,461 @@ get_sadump_data(void)
{
return sd;
}
+
+#ifdef X86_64
+static int
+get_sadump_smram_cpu_state_any(struct sadump_smram_cpu_state *smram)
+{
+ ulong offset;
+ struct sadump_header *sh = sd->dump_header;
+ int apicid;
+ struct sadump_smram_cpu_state scs, zero;
+
+ offset = sd->sub_hdr_offset + sizeof(uint32_t) +
+ sd->dump_header->nr_cpus * sizeof(struct sadump_apic_state);
+
+ memset(&zero, 0, sizeof(zero));
+
+ for (apicid = 0; apicid < sh->nr_cpus; ++apicid) {
+ if (!read_device(&scs, sizeof(scs), &offset)) {
+ error(INFO, "sadump: cannot read sub header "
+ "cpu_state\n");
+ return FALSE;
+ }
+ if (memcmp(&scs, &zero, sizeof(scs)) != 0) {
+ *smram = scs;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+/*
+ * Get address of vector0 interrupt handler (Devide Error) from Interrupt
+ * Descriptor Table.
+ */
+static ulong
+get_vec0_addr(ulong idtr)
+{
+ struct gate_struct64 {
+ uint16_t offset_low;
+ uint16_t segment;
+ uint32_t ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+ uint16_t offset_middle;
+ uint32_t offset_high;
+ uint32_t zero1;
+ } __attribute__((packed)) gate;
+
+ readmem(idtr, PHYSADDR, &gate, sizeof(gate), "idt_table", FAULT_ON_ERROR);
+
+ return ((ulong)gate.offset_high << 32)
+ + ((ulong)gate.offset_middle << 16)
+ + gate.offset_low;
+}
+
+/*
+ * Parse a string of [size[KMG] ]offset[KMG]
+ * Import from Linux kernel(lib/cmdline.c)
+ */
+static ulong memparse(char *ptr, char **retptr)
+{
+ char *endptr;
+
+ unsigned long long ret = strtoull(ptr, &endptr, 0);
+
+ switch (*endptr) {
+ case 'E':
+ case 'e':
+ ret <<= 10;
+ case 'P':
+ case 'p':
+ ret <<= 10;
+ case 'T':
+ case 't':
+ ret <<= 10;
+ case 'G':
+ case 'g':
+ ret <<= 10;
+ case 'M':
+ case 'm':
+ ret <<= 10;
+ case 'K':
+ case 'k':
+ ret <<= 10;
+ endptr++;
+ default:
+ break;
+ }
+
+ if (retptr)
+ *retptr = endptr;
+
+ return ret;
+}
+
+/*
+ * Find "elfcorehdr=" in the boot parameter of kernel and return the address
+ * of elfcorehdr.
+ */
+static ulong
+get_elfcorehdr(ulong cr3, ulong kaslr_offset)
+{
+ char cmdline[BUFSIZE], *ptr;
+ ulong cmdline_vaddr;
+ ulong cmdline_paddr;
+ ulong buf_vaddr, buf_paddr;
+ char *end;
+ ulong elfcorehdr_addr = 0, elfcorehdr_size = 0;
+ int verbose = CRASHDEBUG(1)? 1: 0;
+
+ cmdline_vaddr = st->saved_command_line_vmlinux + kaslr_offset;
+ if (!kvtop(NULL, cmdline_vaddr, &cmdline_paddr, verbose))
+ return 0;
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "cmdline vaddr=%lx\n", cmdline_vaddr);
+ fprintf(fp, "cmdline paddr=%lx\n", cmdline_paddr);
+ }
+
+ if (!readmem(cmdline_paddr, PHYSADDR, &buf_vaddr, sizeof(ulong),
+ "saved_command_line", RETURN_ON_ERROR))
+ return 0;
+
+ if (!kvtop(NULL, buf_vaddr, &buf_paddr, verbose))
+ return 0;
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "cmdline buffer vaddr=%lx\n", buf_vaddr);
+ fprintf(fp, "cmdline buffer paddr=%lx\n", buf_paddr);
+ }
+
+ memset(cmdline, 0, BUFSIZE);
+ if (!readmem(buf_paddr, PHYSADDR, cmdline, BUFSIZE,
+ "saved_command_line", RETURN_ON_ERROR))
+ return 0;
+
+ ptr = strstr(cmdline, "elfcorehdr=");
+ if (!ptr)
+ return 0;
+
+ if (CRASHDEBUG(1))
+ fprintf(fp, "2nd kernel detected\n");
+
+ ptr += strlen("elfcorehdr=");
+ elfcorehdr_addr = memparse(ptr, &end);
+ if (*end == '@') {
+ elfcorehdr_size = elfcorehdr_addr;
+ elfcorehdr_addr = memparse(end + 1, &end);
+ }
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "elfcorehdr_addr=%lx\n", elfcorehdr_addr);
+ fprintf(fp, "elfcorehdr_size=%lx\n", elfcorehdr_size);
+ }
+
+ return elfcorehdr_addr;
+}
+
+ /*
+ * Get vmcoreinfo from elfcorehdr.
+ * Some codes are imported from Linux kernel(fs/proc/vmcore.c)
+ */
+static int
+get_vmcoreinfo(ulong elfcorehdr, ulong *addr, int *len)
+{
+ unsigned char e_ident[EI_NIDENT];
+ Elf64_Ehdr ehdr;
+ Elf64_Phdr phdr;
+ Elf64_Nhdr nhdr;
+ ulong ptr;
+ ulong nhdr_offset = 0;
+ int i;
+
+ if (!readmem(elfcorehdr, PHYSADDR, e_ident, EI_NIDENT,
+ "EI_NIDENT", RETURN_ON_ERROR))
+ return FALSE;
+
+ if (e_ident[EI_CLASS] != ELFCLASS64) {
+ error(INFO, "Only ELFCLASS64 is supportd\n");
+ return FALSE;
+ }
+
+ if (!readmem(elfcorehdr, PHYSADDR, &ehdr, sizeof(ehdr),
+ "Elf64_Ehdr", RETURN_ON_ERROR))
+ return FALSE;
+
+ /* Sanity Check */
+ if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
+ (ehdr.e_type != ET_CORE) ||
+ ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
+ ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr.e_version != EV_CURRENT ||
+ ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
+ ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
+ ehdr.e_phnum == 0) {
+ error(INFO, "Invalid elf header\n");
+ return FALSE;
+ }
+
+ ptr = elfcorehdr + ehdr.e_phoff;
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ ulong offset;
+ char name[16];
+
+ if (!readmem(ptr, PHYSADDR, &phdr, sizeof(phdr),
+ "Elf64_Phdr", RETURN_ON_ERROR))
+ return FALSE;
+
+ ptr += sizeof(phdr);
+ if (phdr.p_type != PT_NOTE)
+ continue;
+
+ offset = phdr.p_offset;
+ if (!readmem(offset, PHYSADDR, &nhdr, sizeof(nhdr),
+ "Elf64_Nhdr", RETURN_ON_ERROR))
+ return FALSE;
+
+ offset += DIV_ROUND_UP(sizeof(Elf64_Nhdr), sizeof(Elf64_Word))*
+ sizeof(Elf64_Word);
+ memset(name, 0, sizeof(name));
+ if (!readmem(offset, PHYSADDR, name, sizeof(name),
+ "Elf64_Nhdr name", RETURN_ON_ERROR))
+ return FALSE;
+
+ if(!strcmp(name, "VMCOREINFO")) {
+ nhdr_offset = offset;
+ break;
+ }
+ }
+
+ if (!nhdr_offset)
+ return FALSE;
+
+ *addr = nhdr_offset +
+ DIV_ROUND_UP(nhdr.n_namesz, sizeof(Elf64_Word))*
+ sizeof(Elf64_Word);
+ *len = nhdr.n_descsz;
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "vmcoreinfo addr=%lx\n", *addr);
+ fprintf(fp, "vmcoreinfo len=%d\n", *len);
+ }
+
+ return TRUE;
+}
+
+/*
+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd kernel.
+ * If we are in 2nd kernel, get kaslr_offset/phys_base from vmcoreinfo.
+ *
+ * 1. Get command line and try to retrieve "elfcorehdr=" boot parameter
+ * 2. If "elfcorehdr=" is not found in command line, we are in 1st kernel.
+ * There is nothing to do.
+ * 3. If "elfcorehdr=" is found, we are in 2nd kernel. Find vmcoreinfo
+ * using "elfcorehdr=" and retrieve kaslr_offset/phys_base from vmcoreinfo.
+ */
+static int
+get_kaslr_offset_from_vmcoreinfo(ulong cr3, ulong orig_kaslr_offset,
+ ulong *kaslr_offset, ulong *phys_base)
+{
+ ulong elfcorehdr_addr = 0;
+ ulong vmcoreinfo_addr;
+ int vmcoreinfo_len;
+ char *buf, *pos;
+ int ret = FALSE;
+
+ /* Find "elfcorehdr=" in the kernel boot parameter */
+ elfcorehdr_addr = get_elfcorehdr(cr3, orig_kaslr_offset);
+ if (!elfcorehdr_addr)
+ return FALSE;
+
+ /* Get vmcoreinfo from the address of "elfcorehdr=" */
+ if (!get_vmcoreinfo(elfcorehdr_addr, &vmcoreinfo_addr, &vmcoreinfo_len))
+ return FALSE;
+
+ if (!vmcoreinfo_len)
+ return FALSE;
+
+ if (CRASHDEBUG(1))
+ fprintf(fp, "Find vmcoreinfo in kdump memory\n");
+
+ buf = GETBUF(vmcoreinfo_len);
+ if (!readmem(vmcoreinfo_addr, PHYSADDR, buf, vmcoreinfo_len,
+ "vmcoreinfo", RETURN_ON_ERROR))
+ goto quit;
+
+ /* Get phys_base form vmcoreinfo */
+ pos = strstr(buf, "NUMBER(phys_base)=");
+ if (!pos)
+ goto quit;
+ *phys_base = strtoull(pos + strlen("NUMBER(phys_base)="), NULL, 0);
+
+ /* Get kaslr_offset form vmcoreinfo */
+ pos = strstr(buf, "KERNELOFFSET=");
+ if (!pos)
+ goto quit;
+ *kaslr_offset = strtoull(pos + strlen("KERNELOFFSET="), NULL, 16);
+
+ ret = TRUE;
+
+quit:
+ FREEBUF(buf);
+ return ret;
+}
+
+/*
+ * Calculate kaslr_offset and phys_base
+ *
+ * kaslr_offset:
+ * The difference between original address in System.map or vmlinux and
+ * actual address placed randomly by kaslr feature. To be more accurate,
+ * kaslr_offset = actual address - original address
+ *
+ * phys_base:
+ * Physical address where the kerenel is placed. In other words, it's a
+ * physical address of __START_KERNEL_map. This is also decided randomly by
+ * kaslr.
+ *
+ * kaslr offset and phys_base are calculated as follows:
+ *
+ * kaslr_offset:
+ * 1) Get IDTR and CR3 value from the dump header.
+ * 2) Get a virtual address of IDT from IDTR value
+ * --- (A)
+ * 3) Translate (A) to physical address using CR3, which points a top of
+ * page table.
+ * --- (B)
+ * 4) Get an address of vector0 (Devide Error) interrupt handler from
+ * IDT, which are pointed by (B).
+ * --- (C)
+ * 5) Get an address of symbol "divide_error" form vmlinux
+ * --- (D)
+ *
+ * Now we have two addresses:
+ * (C)-> Actual address of "divide_error"
+ * (D)-> Original address of "divide_error" in the vmlinux
+ *
+ * kaslr_offset can be calculated by the difference between these two
+ * value.
+ *
+ * phys_base;
+ * 1) Get IDT virtual address from vmlinux
+ * --- (E)
+ *
+ * So phys_base can be calculated using relationship of directly mapped
+ * address.
+ *
+ * phys_base =
+ * Physical address(B) -
+ * (Virtual address(E) + kaslr_offset - __START_KERNEL_map)
+ *
+ * Note that the address (A) cannot be used instead of (E) because (A) is
+ * not direct map address, it's a fixed map address.
+ *
+ * This solution works in most every case, but does not work in the
+ * following case.
+ *
+ * 1) If the dump is captured on early stage of kernel boot, IDTR points
+ * early IDT table(early_idts) instead of normal IDT(idt_table).
+ * 2) If the dump is captured whle kdump is working, IDTR points
+ * IDT table of 2nd kernel, not 1st kernel.
+ *
+ * Current implementation does not support the case 1), need
+ * enhancement in the future. For the case 2), get kaslr_offset and
+ * phys_base as follows.
+ *
+ * 1) Get kaslr_offset and phys_base using the above solution.
+ * 2) Get kernel boot parameter from "saved_command_line"
+ * 3) If "elfcorehdr=" is not included in boot parameter, we are in the
+ * first kernel, nothing to do any more.
+ * 4) If "elfcorehdr=" is included in boot parameter, we are in the 2nd
+ * kernel. Retrieve vmcoreinfo from address of "elfcorehdr=" and
+ * get kaslr_offset and phys_base from vmcoreinfo.
+ */
+int
+sadump_calc_kaslr_offset(ulong *kaslr_offset)
+{
+ ulong phys_base = 0;
+ struct sadump_smram_cpu_state scs;
+ uint64_t idtr = 0, cr3 = 0, idtr_paddr;
+ ulong divide_error_vmcore;
+ ulong kaslr_offset_kdump, phys_base_kdump;
+ int ret = FALSE;
+ int verbose = CRASHDEBUG(1)? 1: 0;
+
+ if (!machine_type("X86_64"))
+ return FALSE;
+
+ memset(&scs, 0, sizeof(scs));
+ get_sadump_smram_cpu_state_any(&scs);
+ cr3 = scs.Cr3;
+ idtr = ((uint64_t)scs.IdtUpper)<<32 | (uint64_t)scs.IdtLower;
+
+ /*
+ * Set up for kvtop.
+ *
+ * calc_kaslr_offset() is called before machdep_init(PRE_GDB), so some
+ * variables are not initialized yet. Set up them here to call kvtop().
+ *
+ * TODO: XEN and 5-level is not supported
+ */
+ vt->kernel_pgd[0] = cr3;
+ machdep->machspec->last_pml4_read = vt->kernel_pgd[0];
+ machdep->machspec->physical_mask_shift = __PHYSICAL_MASK_SHIFT_2_6;
+ machdep->machspec->pgdir_shift = PGDIR_SHIFT;
+ if (!readmem(cr3, PHYSADDR, machdep->machspec->pml4, PAGESIZE(),
+ "cr3", RETURN_ON_ERROR))
+ goto quit;
+
+ /* Convert virtual address of IDT table to physical address */
+ if (!kvtop(NULL, idtr, &idtr_paddr, verbose))
+ goto quit;
+
+ /* Now we can calculate kaslr_offset and phys_base */
+ divide_error_vmcore = get_vec0_addr(idtr_paddr);
+ *kaslr_offset = divide_error_vmcore - st->divide_error_vmlinux;
+ phys_base = idtr_paddr -
+ (st->idt_table_vmlinux + *kaslr_offset - __START_KERNEL_map);
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "calc_kaslr_offset: idtr=%lx\n", idtr);
+ fprintf(fp, "calc_kaslr_offset: cr3=%lx\n", cr3);
+ fprintf(fp, "calc_kaslr_offset: idtr(phys)=%lx\n", idtr_paddr);
+ fprintf(fp, "calc_kaslr_offset: divide_error(vmlinux): %lx\n",
+ st->divide_error_vmlinux);
+ fprintf(fp, "calc_kaslr_offset: divide_error(vmcore): %lx\n",
+ divide_error_vmcore);
+ }
+
+ /*
+ * Check if current kaslr_offset/phys_base is for 1st kernel or 2nd
+ * kernel. If we are in 2nd kernel, get kaslr_offset/phys_base
+ * from vmcoreinfo
+ */
+ if (get_kaslr_offset_from_vmcoreinfo(
+ cr3, *kaslr_offset, &kaslr_offset_kdump, &phys_base_kdump)) {
+ *kaslr_offset = kaslr_offset_kdump;
+ phys_base = phys_base_kdump;
+ }
+
+ if (CRASHDEBUG(1)) {
+ fprintf(fp, "calc_kaslr_offset: kaslr_offset=%lx\n",
+ *kaslr_offset);
+ fprintf(fp, "calc_kaslr_offset: phys_base=%lx\n", phys_base);
+ }
+
+ sd->phys_base = phys_base;
+ ret = TRUE;
+quit:
+ vt->kernel_pgd[0] = 0;
+ machdep->machspec->last_pml4_read = 0;
+ return ret;
+}
+#else
+int
+sadump_calc_kaslr_offset(ulong *kaslr_offset)
+{
+ return FALSE;
+}
+#endif /* X86_64 */
diff --git a/sadump.h b/sadump.h
index 7f8e384..681f5e4 100644
--- a/sadump.h
+++ b/sadump.h
@@ -219,6 +219,7 @@ struct sadump_data {
ulonglong backup_offset;
uint64_t max_mapnr;
+ ulong phys_base;
};
struct sadump_data *sadump_get_sadump_data(void);
diff --git a/symbols.c b/symbols.c
index 02cb34e..b2f2796 100644
--- a/symbols.c
+++ b/symbols.c
@@ -624,6 +624,9 @@ kaslr_init(void)
st->_stext_vmlinux = UNINITIALIZED;
}
}
+
+ if (SADUMP_DUMPFILE())
+ kt->flags2 |= KASLR_CHECK;
}
/*
@@ -637,6 +640,19 @@ derive_kaslr_offset(bfd *abfd, int dynamic, bfd_byte *start, bfd_byte *end,
unsigned long relocate;
ulong _stext_relocated;
+ if (SADUMP_DUMPFILE()) {
+ ulong kaslr_offset = 0;
+
+ sadump_calc_kaslr_offset(&kaslr_offset);
+
+ if (kaslr_offset) {
+ kt->relocate = kaslr_offset * -1;
+ kt->flags |= RELOC_SET;
+ }
+
+ return;
+ }
+
if (ACTIVE()) {
_stext_relocated = symbol_value_from_proc_kallsyms("_stext");
if (_stext_relocated == BADVAL)
@@ -3052,6 +3068,16 @@ dump_symbol_table(void)
else
fprintf(fp, "\n");
+ if (SADUMP_DUMPFILE()) {
+ fprintf(fp, "divide_error_vmlinux: %lx\n", st->divide_error_vmlinux);
+ fprintf(fp, " idt_table_vmlinux: %lx\n", st->idt_table_vmlinux);
+ fprintf(fp, "saved_command_line_vmlinux: %lx\n", st->saved_command_line_vmlinux);
+ } else {
+ fprintf(fp, "divide_error_vmlinux: (unused)\n");
+ fprintf(fp, " idt_table_vmlinux: (unused)\n");
+ fprintf(fp, "saved_command_line_vmlinux: (unused)\n");
+ }
+
fprintf(fp, " symval_hash[%d]: %lx\n", SYMVAL_HASH,
(ulong)&st->symval_hash[0]);
@@ -12246,6 +12272,24 @@ numeric_forward(const void *P_x, const void *P_y)
}
}
+ if (SADUMP_DUMPFILE()) {
+ /* Need for kaslr_offset and phys_base */
+ if (STREQ(x->name, "divide_error"))
+ st->divide_error_vmlinux = valueof(x);
+ else if (STREQ(y->name, "divide_error"))
+ st->divide_error_vmlinux = valueof(y);
+
+ if (STREQ(x->name, "idt_table"))
+ st->idt_table_vmlinux = valueof(x);
+ else if (STREQ(y->name, "idt_table"))
+ st->idt_table_vmlinux = valueof(y);
+
+ if (STREQ(x->name, "saved_command_line"))
+ st->saved_command_line_vmlinux = valueof(x);
+ else if (STREQ(y->name, "saved_command_line"))
+ st->saved_command_line_vmlinux = valueof(y);
+ }
+
xs = bfd_get_section(x);
ys = bfd_get_section(y);
diff --git a/x86_64.c b/x86_64.c
index 6e60dda..2f9e6db 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -194,6 +194,9 @@ x86_64_init(int when)
machdep->machspec->kernel_image_size = dtol(string, QUIET, NULL);
free(string);
}
+ if (SADUMP_DUMPFILE())
+ /* Need for calculation of kaslr_offset and phys_base */
+ machdep->kvtop = x86_64_kvtop;
break;
case PRE_GDB:
@@ -2019,6 +2022,22 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
ulong pte;
physaddr_t physpage;
+ if (SADUMP_DUMPFILE() && !(machdep->flags & KSYMS_START)) {
+ /*
+ * In the case of sadump, to calculate kaslr_offset and
+ * phys_base, kvtop is called during symtab_init(). In this
+ * stage phys_base is not initialized yet and x86_64_VTOP()
+ * does not work. Jump to the code of pagetable translation.
+ */
+ FILL_PML4();
+ pml4 = ((ulong *)machdep->machspec->pml4) + pml4_index(kvaddr);
+ if (verbose) {
+ fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]);
+ fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4);
+ }
+ goto start_vtop_with_pagetable;
+ }
+
if (!IS_KVADDR(kvaddr))
return FALSE;
@@ -2065,6 +2084,8 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
fprintf(fp, "PAGE DIRECTORY: %lx\n", *pml4);
}
}
+
+start_vtop_with_pagetable:
if (!(*pml4) & _PAGE_PRESENT)
goto no_kpage;
pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK;
commit 4550bf32a5ec1d9b7b6d5099aaee6e8e363a7827
Author: Dave Anderson <anderson@redhat.com>
Date: Wed Oct 25 11:04:53 2017 -0400
Implemented a new "ps -y policy" option to filter the task display
by scheduling policy. Applicable to both standalone ps invocation
as well as via foreach.
(oleksandr@redhat.com)
diff --git a/defs.h b/defs.h
index 76e5512..4b4e331 100644
--- a/defs.h
+++ b/defs.h
@@ -1139,6 +1139,7 @@ extern struct machdep_table *machdep;
#define FOREACH_a_FLAG (0x4000000)
#define FOREACH_G_FLAG (0x8000000)
#define FOREACH_F_FLAG2 (0x10000000)
+#define FOREACH_y_FLAG (0x20000000)
#define FOREACH_PS_EXCLUSIVE \
(FOREACH_g_FLAG|FOREACH_a_FLAG|FOREACH_t_FLAG|FOREACH_c_FLAG|FOREACH_p_FLAG|FOREACH_l_FLAG|FOREACH_r_FLAG|FOREACH_m_FLAG)
@@ -1162,6 +1163,7 @@ struct foreach_data {
int comms;
int args;
int regexs;
+ int policy;
};
struct reference {
@@ -1992,6 +1994,7 @@ struct offset_table { /* stash of commonly-used offsets */
long mod_arch_specific_num_orcs;
long mod_arch_specific_orc_unwind_ip;
long mod_arch_specific_orc_unwind;
+ long task_struct_policy;
};
struct size_table { /* stash of commonly-used sizes */
@@ -2141,6 +2144,7 @@ struct size_table { /* stash of commonly-used sizes */
long sk_buff_head_qlen;
long sk_buff_len;
long orc_entry;
+ long task_struct_policy;
};
struct array_table {
@@ -4576,6 +4580,13 @@ enum type_code {
*/
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
+#define SCHED_NORMAL 0
+#define SCHED_FIFO 1
+#define SCHED_RR 2
+#define SCHED_BATCH 3
+#define SCHED_ISO 4
+#define SCHED_IDLE 5
+#define SCHED_DEADLINE 6
extern long _ZOMBIE_;
#define IS_ZOMBIE(task) (task_state(task) & _ZOMBIE_)
@@ -4603,6 +4614,7 @@ extern long _ZOMBIE_;
#define PS_NO_HEADER (0x10000)
#define PS_MSECS (0x20000)
#define PS_SUMMARY (0x40000)
+#define PS_POLICY (0x80000)
#define PS_EXCLUSIVE (PS_TGID_LIST|PS_ARGV_ENVP|PS_TIMES|PS_CHILD_LIST|PS_PPID_LIST|PS_LAST_RUN|PS_RLIMIT|PS_MSECS|PS_SUMMARY)
@@ -4620,6 +4632,7 @@ struct psinfo {
} regex_data[MAX_PS_ARGS];
int regexs;
ulong *cpus;
+ int policy;
};
#define IS_A_NUMBER(X) (decimal(X, 0) || hexadecimal(X, 0))
@@ -4823,7 +4836,7 @@ char *strip_ending_char(char *, char);
char *strip_beginning_char(char *, char);
char *strip_comma(char *);
char *strip_hex(char *);
-char *upper_case(char *, char *);
+char *upper_case(const char *, char *);
char *first_nonspace(char *);
char *first_space(char *);
char *replace_string(char *, char *, char);
diff --git a/help.c b/help.c
index f9c5792..efa55e0 100644
--- a/help.c
+++ b/help.c
@@ -844,7 +844,7 @@ char *help_foreach[] = {
" net run the \"net\" command (optional flags: -s -S -R -d -x)",
" set run the \"set\" command",
" ps run the \"ps\" command (optional flags: -G -s -p -c -t -l -a",
-" -g -r)",
+" -g -r -y)",
" sig run the \"sig\" command (optional flag: -g)",
" vtop run the \"vtop\" command (optional flags: -c -u -k)\n",
" flag Pass this optional flag to the command selected.",
@@ -1250,7 +1250,7 @@ NULL
char *help_ps[] = {
"ps",
"display process status information",
-"[-k|-u|-G] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S]\n [pid | task | command] ...",
+"[-k|-u|-G|-y policy] [-s] [-p|-c|-t|-[l|m][-C cpu]|-a|-g|-r|-S]\n [pid | task | command] ...",
" This command displays process status for selected, or all, processes" ,
" in the system. If no arguments are entered, the process data is",
" is displayed for all processes. Specific processes may be selected",
@@ -1267,6 +1267,16 @@ char *help_ps[] = {
" -k restrict the output to only kernel threads.",
" -u restrict the output to only user tasks.",
" -G display only the thread group leader in a thread group.",
+" -y policy restrict the output to tasks having a specified scheduling policy",
+" expressed by its integer value or by its (case-insensitive) name;",
+" multiple policies may be entered in a comma-separated list:",
+" 0 or NORMAL",
+" 1 or FIFO",
+" 2 or RR",
+" 3 or BATCH",
+" 4 or ISO",
+" 5 or IDLE",
+" 6 or DEADLINE",
" ",
" The process identifier types may be mixed. For each task, the following",
" items are displayed:",
diff --git a/symbols.c b/symbols.c
index b2f2796..f7599e8 100644
--- a/symbols.c
+++ b/symbols.c
@@ -8584,6 +8584,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(task_struct_prio));
fprintf(fp, " task_struct_on_rq: %ld\n",
OFFSET(task_struct_on_rq));
+ fprintf(fp, " task_struct_policy: %ld\n",
+ OFFSET(task_struct_policy));
fprintf(fp, " thread_info_task: %ld\n",
OFFSET(thread_info_task));
@@ -10211,6 +10213,7 @@ dump_offset_table(char *spec, ulong makestruct)
fprintf(fp, " pt_regs: %ld\n", SIZE(pt_regs));
fprintf(fp, " task_struct: %ld\n", SIZE(task_struct));
fprintf(fp, " task_struct_flags: %ld\n", SIZE(task_struct_flags));
+ fprintf(fp, " task_struct_policy: %ld\n", SIZE(task_struct_policy));
fprintf(fp, " thread_info: %ld\n", SIZE(thread_info));
fprintf(fp, " softirq_state: %ld\n",
SIZE(softirq_state));
diff --git a/task.c b/task.c
index 362822c..5754159 100644
--- a/task.c
+++ b/task.c
@@ -109,6 +109,24 @@ static void show_ps_summary(ulong);
static void irqstacks_init(void);
static void parse_task_thread(int argcnt, char *arglist[], struct task_context *);
static void stack_overflow_check_init(void);
+static int has_sched_policy(ulong, ulong);
+static ulong task_policy(ulong);
+static ulong sched_policy_bit_from_str(const char *);
+static ulong make_sched_policy(const char *);
+
+static struct sched_policy_info {
+ ulong value;
+ char *name;
+} sched_policy_info[] = {
+ { SCHED_NORMAL, "NORMAL" },
+ { SCHED_FIFO, "FIFO" },
+ { SCHED_RR, "RR" },
+ { SCHED_BATCH, "BATCH" },
+ { SCHED_ISO, "ISO" },
+ { SCHED_IDLE, "IDLE" },
+ { SCHED_DEADLINE, "DEADLINE" },
+ { ULONG_MAX, NULL }
+};
/*
* Figure out how much space will be required to hold the task context
@@ -273,6 +291,8 @@ task_init(void)
MEMBER_OFFSET_INIT(task_struct_next_run, "task_struct", "next_run");
MEMBER_OFFSET_INIT(task_struct_flags, "task_struct", "flags");
MEMBER_SIZE_INIT(task_struct_flags, "task_struct", "flags");
+ MEMBER_OFFSET_INIT(task_struct_policy, "task_struct", "policy");
+ MEMBER_SIZE_INIT(task_struct_policy, "task_struct", "policy");
MEMBER_OFFSET_INIT(task_struct_pidhash_next,
"task_struct", "pidhash_next");
MEMBER_OFFSET_INIT(task_struct_pgrp, "task_struct", "pgrp");
@@ -2974,7 +2994,7 @@ cmd_ps(void)
cpuspec = NULL;
flag = 0;
- while ((c = getopt(argcnt, args, "SgstcpkuGlmarC:")) != EOF) {
+ while ((c = getopt(argcnt, args, "SgstcpkuGlmarC:y:")) != EOF) {
switch(c)
{
case 'k':
@@ -3075,6 +3095,11 @@ cmd_ps(void)
make_cpumask(cpuspec, psinfo.cpus, FAULT_ON_ERROR, NULL);
break;
+ case 'y':
+ flag |= PS_POLICY;
+ psinfo.policy = make_sched_policy(optarg);
+ break;
+
default:
argerrs++;
break;
@@ -3218,6 +3243,8 @@ show_ps_data(ulong flag, struct task_context *tc, struct psinfo *psi)
return;
if ((flag & PS_KERNEL) && !is_kernel_thread(tc->task))
return;
+ if ((flag & PS_POLICY) && !has_sched_policy(tc->task, psi->policy))
+ return;
if (flag & PS_GROUP) {
if (flag & (PS_LAST_RUN|PS_MSECS))
error(FATAL, "-G not supported with -%c option\n",
@@ -3336,7 +3363,7 @@ show_ps(ulong flag, struct psinfo *psi)
tc = FIRST_CONTEXT();
for (i = 0; i < RUNNING_TASKS(); i++, tc++)
- show_ps_data(flag, tc, NULL);
+ show_ps_data(flag, tc, psi);
return;
}
@@ -3391,7 +3418,7 @@ show_ps(ulong flag, struct psinfo *psi)
if (flag & PS_TIMES)
show_task_times(tc, flag);
else
- show_ps_data(flag, tc, NULL);
+ show_ps_data(flag, tc, psi);
}
}
}
@@ -3546,7 +3573,7 @@ show_milliseconds(struct task_context *tc, struct psinfo *psi)
sprintf(format, "[%c%dll%c] ", '%', c,
pc->output_radix == 10 ? 'u' : 'x');
- if (psi) {
+ if (psi && psi->cpus) {
for (c = others = 0; c < kt->cpus; c++) {
if (!NUM_IN_BITMAP(psi->cpus, c))
continue;
@@ -5366,6 +5393,27 @@ task_flags(ulong task)
}
/*
+ * Return task's policy as bitmask bit.
+ */
+static ulong
+task_policy(ulong task)
+{
+ ulong policy = 0;
+
+ fill_task_struct(task);
+
+ if (!tt->last_task_read)
+ return policy;
+
+ if (SIZE(task_struct_policy) == sizeof(unsigned int))
+ policy = 1 << UINT(tt->task_struct + OFFSET(task_struct_policy));
+ else
+ policy = 1 << ULONG(tt->task_struct + OFFSET(task_struct_policy));
+
+ return policy;
+}
+
+/*
* Return a task's tgid.
*/
ulong
@@ -5797,7 +5845,7 @@ cmd_foreach(void)
BZERO(&foreach_data, sizeof(struct foreach_data));
fd = &foreach_data;
- while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaG")) != EOF) {
+ while ((c = getopt(argcnt, args, "R:vomlgersStTpukcfFxhdaGy:")) != EOF) {
switch(c)
{
case 'R':
@@ -5892,6 +5940,11 @@ cmd_foreach(void)
fd->flags |= FOREACH_G_FLAG;
break;
+ case 'y':
+ fd->flags |= FOREACH_y_FLAG;
+ fd->policy = make_sched_policy(optarg);
+ break;
+
default:
argerrs++;
break;
@@ -6554,6 +6607,10 @@ foreach(struct foreach_data *fd)
cmdflags |= PS_GROUP;
if (fd->flags & FOREACH_s_FLAG)
cmdflags |= PS_KSTACKP;
+ if (fd->flags & FOREACH_y_FLAG) {
+ cmdflags |= PS_POLICY;
+ psinfo.policy = fd->policy;
+ }
/*
* mutually exclusive flags
*/
@@ -7389,6 +7446,82 @@ is_kernel_thread(ulong task)
}
/*
+ * Checks if task policy corresponds to given mask.
+ */
+static int
+has_sched_policy(ulong task, ulong policy)
+{
+ return !!(task_policy(task) & policy);
+}
+
+/*
+ * Converts sched policy name into mask bit.
+ */
+static ulong
+sched_policy_bit_from_str(const char *policy_str)
+{
+ struct sched_policy_info *info = NULL;
+ ulong policy = 0;
+ int found = 0;
+ char *upper = NULL;
+ /*
+ * Once kernel gets more than 10 scheduling policies,
+ * sizes of these arrays should be adjusted
+ */
+ char digit[2] = { 0, 0 };
+ char hex[4] = { 0, 0, 0, 0 };
+
+ upper = GETBUF(strlen(policy_str) + 1);
+ upper_case(policy_str, upper);
+
+ for (info = sched_policy_info; info->name; info++) {
+ snprintf(digit, sizeof digit, "%lu", info->value);
+ /*
+ * Not using %#lX format here since "0X" prefix
+ * is not prepended if 0 value is given
+ */
+ snprintf(hex, sizeof hex, "0X%lX", info->value);
+ if (strncmp(upper, info->name, strlen(info->name)) == 0 ||
+ strncmp(upper, digit, sizeof digit) == 0 ||
+ strncmp(upper, hex, sizeof hex) == 0) {
+ policy = 1 << info->value;
+ found = 1;
+ break;
+ }
+ }
+
+ FREEBUF(upper);
+
+ if (!found)
+ error(FATAL,
+ "%s: invalid scheduling policy\n", policy_str);
+
+ return policy;
+}
+
+/*
+ * Converts sched policy string set into bitmask.
+ */
+static ulong
+make_sched_policy(const char *policy_str)
+{
+ ulong policy = 0;
+ char *iter = NULL;
+ char *orig = NULL;
+ char *cur = NULL;
+
+ iter = STRDUPBUF(policy_str);
+ orig = iter;
+
+ while ((cur = strsep(&iter, ",")))
+ policy |= sched_policy_bit_from_str(cur);
+
+ FREEBUF(orig);
+
+ return policy;
+}
+
+/*
* Gather an arry of pointers to the per-cpu idle tasks. The tasklist
* argument must be at least the size of ulong[NR_CPUS]. There may be
* junk in everything after the first entry on a single CPU box, so the
diff --git a/tools.c b/tools.c
index 886d7fb..186b703 100644
--- a/tools.c
+++ b/tools.c
@@ -423,9 +423,10 @@ strip_hex(char *line)
* Turn a string into upper-case.
*/
char *
-upper_case(char *s, char *buf)
+upper_case(const char *s, char *buf)
{
- char *p1, *p2;
+ const char *p1;
+ char *p2;
p1 = s;
p2 = buf;
commit d3909692e9f64e4a1ac440afa81e9efd6e9ea0b4
Author: Dave Anderson <anderson@redhat.com>
Date: Thu Oct 26 16:54:28 2017 -0400
Fix for the "kmem -[sS]" options on Linux 4.14 and later kernels that
contain commit 2482ddec670fb83717d129012bc558777cb159f7, titled
"mm: add SLUB free list pointer obfuscation". Without the patch,
there will numerous error messages of the type "kmem: <cache name>
slab: <address> invalid freepointer: <obfuscated address>".
(anderson@redhat.com)
diff --git a/defs.h b/defs.h
index 4b4e331..967fce0 100644
--- a/defs.h
+++ b/defs.h
@@ -1995,6 +1995,7 @@ struct offset_table { /* stash of commonly-used offsets */
long mod_arch_specific_orc_unwind_ip;
long mod_arch_specific_orc_unwind;
long task_struct_policy;
+ long kmem_cache_random;
};
struct size_table { /* stash of commonly-used sizes */
diff --git a/memory.c b/memory.c
index fb534e8..9926199 100644
--- a/memory.c
+++ b/memory.c
@@ -75,7 +75,7 @@ struct meminfo { /* general purpose memory information structure */
ulong container;
int *freelist;
int freelist_index_size;
-
+ ulong random;
};
/*
@@ -293,6 +293,7 @@ static void dump_per_cpu_offsets(void);
static void dump_page_flags(ulonglong);
static ulong kmem_cache_nodelists(ulong);
static void dump_hstates(void);
+static ulong freelist_ptr(struct meminfo *, ulong, ulong);
/*
* Memory display modes specific to this file.
@@ -726,6 +727,7 @@ vm_init(void)
MEMBER_OFFSET_INIT(kmem_cache_red_left_pad, "kmem_cache", "red_left_pad");
MEMBER_OFFSET_INIT(kmem_cache_name, "kmem_cache", "name");
MEMBER_OFFSET_INIT(kmem_cache_flags, "kmem_cache", "flags");
+ MEMBER_OFFSET_INIT(kmem_cache_random, "kmem_cache", "random");
MEMBER_OFFSET_INIT(kmem_cache_cpu_freelist, "kmem_cache_cpu", "freelist");
MEMBER_OFFSET_INIT(kmem_cache_cpu_page, "kmem_cache_cpu", "page");
MEMBER_OFFSET_INIT(kmem_cache_cpu_node, "kmem_cache_cpu", "node");
@@ -18000,6 +18002,9 @@ dump_kmem_cache_slub(struct meminfo *si)
si->slabsize = (PAGESIZE() << order);
si->inuse = si->num_slabs = 0;
si->slab_offset = offset;
+ si->random = VALID_MEMBER(kmem_cache_random) ?
+ ULONG(si->cache_buf + OFFSET(kmem_cache_random)) : 0;
+
if (!get_kmem_cache_slub_data(GET_SLUB_SLABS, si) ||
!get_kmem_cache_slub_data(GET_SLUB_OBJECTS, si))
si->flags |= SLAB_GATHER_FAILURE;
@@ -18587,6 +18592,15 @@ count_free_objects(struct meminfo *si, ulong freelist)
return c;
}
+static ulong
+freelist_ptr(struct meminfo *si, ulong ptr, ulong ptr_addr)
+{
+ if (si->random)
+ /* CONFIG_SLAB_FREELIST_HARDENED */
+ return (ptr ^ si->random ^ ptr_addr);
+ else
+ return ptr;
+}
static ulong
get_freepointer(struct meminfo *si, void *object)
@@ -18601,7 +18615,7 @@ get_freepointer(struct meminfo *si, void *object)
return BADADDR;
}
- return nextfree;
+ return (freelist_ptr(si, nextfree, vaddr));
}
static void
diff --git a/symbols.c b/symbols.c
index f7599e8..8a4c878 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9378,6 +9378,8 @@ dump_offset_table(char *spec, ulong makestruct)
OFFSET(kmem_cache_cpu_cache));
fprintf(fp, " kmem_cache_oo: %ld\n",
OFFSET(kmem_cache_oo));
+ fprintf(fp, " kmem_cache_random: %ld\n",
+ OFFSET(kmem_cache_random));
fprintf(fp, " kmem_cache_node_nr_partial: %ld\n",
OFFSET(kmem_cache_node_nr_partial));
commit e81db08bc69fb1a7a7e48f892c2038d992a71f6d
Author: Dave Anderson <anderson@redhat.com>
Date: Fri Oct 27 14:10:43 2017 -0400
Fix for the validation of the bits located in the least signficant
bits of mem_section.section_mem_map pointers. Without the patch,
the validation functions always returned valid, due to a coding
error found by clang. However, it was never really a problem
because it is extremely unlikely that an existing mem_section would
ever be invalid.
(oleksandr@redhat.com, anderson@redhat.com)
diff --git a/memory.c b/memory.c
index 9926199..60594a4 100644
--- a/memory.c
+++ b/memory.c
@@ -17003,8 +17003,8 @@ valid_section(ulong addr)
if ((mem_section = read_mem_section(addr)))
return (ULONG(mem_section +
- OFFSET(mem_section_section_mem_map)) &&
- SECTION_MARKED_PRESENT);
+ OFFSET(mem_section_section_mem_map))
+ & SECTION_MARKED_PRESENT);
return 0;
}
@@ -17012,11 +17012,17 @@ int
section_has_mem_map(ulong addr)
{
char *mem_section;
+ ulong kernel_version_bit;
+
+ if (THIS_KERNEL_VERSION >= LINUX(2,6,24))
+ kernel_version_bit = SECTION_HAS_MEM_MAP;
+ else
+ kernel_version_bit = SECTION_MARKED_PRESENT;
if ((mem_section = read_mem_section(addr)))
return (ULONG(mem_section +
OFFSET(mem_section_section_mem_map))
- && SECTION_HAS_MEM_MAP);
+ & kernel_version_bit);
return 0;
}
commit 0f40db8fbac538ea448bbb2beb44912e4c43a54a
Author: Dave Anderson <anderson@redhat.com>
Date: Mon Oct 30 14:20:41 2017 -0400
Fix for the x86_64 kernel virtual address to physical address
translation mechanism. Without the patch, when verifying that the
PAGE_PRESENT bit is set in the top-level page table, it would always
test positively, and the translation would continue parsing the
remainder of the page tables. This would virtually never be a
problem in practice because if the top-level page table entry
existed, its PAGE_PRESENT bit would be set.
(oleksandr@redhat.com, anderson@redhat.com)
diff --git a/x86_64.c b/x86_64.c
index 2f9e6db..7d01140 100644
--- a/x86_64.c
+++ b/x86_64.c
@@ -2086,7 +2086,7 @@ x86_64_kvtop(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, int verbo
}
start_vtop_with_pagetable:
- if (!(*pml4) & _PAGE_PRESENT)
+ if (!(*pml4 & _PAGE_PRESENT))
goto no_kpage;
pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK;
FILL_PGD(pgd_paddr, PHYSADDR, PAGESIZE());
@@ -2187,7 +2187,7 @@ x86_64_kvtop_xen_wpt(struct task_context *tc, ulong kvaddr, physaddr_t *paddr, i
fprintf(fp, "PML4 DIRECTORY: %lx\n", vt->kernel_pgd[0]);
fprintf(fp, "PAGE DIRECTORY: %lx [machine]\n", *pml4);
}
- if (!(*pml4) & _PAGE_PRESENT)
+ if (!(*pml4 & _PAGE_PRESENT))
goto no_kpage;
pgd_paddr = (*pml4) & PHYSICAL_PAGE_MASK;
pgd_paddr = xen_m2p(pgd_paddr);
commit 9339874f3764fe99a408aec1a814b19c77f5dfe1
Author: Dave Anderson <anderson@redhat.com>
Date: Mon Oct 30 14:33:15 2017 -0400
Removed a check for a negative block_size value which is always a
non-negative unsigned value in the SADUMP header parsing function.
(oleksandr@redhat.com)
diff --git a/sadump.c b/sadump.c
index 2ccfa82..6b912d4 100644
--- a/sadump.c
+++ b/sadump.c
@@ -157,9 +157,6 @@ read_dump_header(char *file)
}
restart:
- if (block_size < 0)
- return FALSE;
-
if (!read_device(sph, block_size, &offset)) {
error(INFO, "sadump: cannot read partition header\n");
goto err;
commit b2d1bba766118fddf43235f0bed483dff32ac6e0
Author: Dave Anderson <anderson@redhat.com>
Date: Mon Oct 30 14:46:32 2017 -0400
Removed a check for an impossible negative value when calculating
the beginning address when applying the context value specified by
the "search -x <count>" option.
(oleksandr@redhat.com)
diff --git a/memory.c b/memory.c
index 60594a4..ebd671a 100644
--- a/memory.c
+++ b/memory.c
@@ -14467,7 +14467,7 @@ display_with_pre_and_post(void *bufptr, ulonglong addr, struct searchinfo *si)
}
amount = ctx * t;
- addr_d = addr - amount < 0 ? 0 : addr - amount;
+ addr_d = addr - amount;
display_memory(addr_d, ctx, flag, memtype, NULL);
commit e2efacdd9b7b229747a78c743b2acc6d15280a8a
Author: Dave Anderson <anderson@redhat.com>
Date: Mon Oct 30 16:49:48 2017 -0400
Implemented a new "timer -C <cpu-specifier>" option that restricts
the timer or hrtimer output to the timer queue data associated with
one or more cpus. For multiple cpus, the cpu-specifier uses the
standard comma or dash separated list format.
(oleksandr@redhat.com)
diff --git a/help.c b/help.c
index efa55e0..f7f61a1 100644
--- a/help.c
+++ b/help.c
@@ -2387,7 +2387,7 @@ NULL
char *help_timer[] = {
"timer",
"timer queue data",
-"[-r]",
+"[-r][-C cpu]",
" This command displays the timer queue entries, both old- and new-style,",
" in chronological order. In the case of the old-style timers, the",
" timer_table array index is shown; in the case of the new-style timers, ",
@@ -2397,6 +2397,8 @@ char *help_timer[] = {
" chronological order. In the case of the old-style hrtimers, the",
" expiration time is a single value; in the new-style hrtimers, the",
" expiration time is a range.",
+" -C cpu Restrict the output to one or more CPUs, where multiple cpu[s] can",
+" be specified, for example, as \"1,3,5\", \"1-3\", or \"1,3,5-7,10\".",
"\nEXAMPLES",
" %s> timer",
" JIFFIES",
diff --git a/kernel.c b/kernel.c
index 8e95573..4638495 100644
--- a/kernel.c
+++ b/kernel.c
@@ -38,18 +38,18 @@ static void display_bh_1(void);
static void display_bh_2(void);
static void display_bh_3(void);
static void display_bh_4(void);
-static void dump_hrtimer_data(void);
+static void dump_hrtimer_data(const ulong *cpus);
static void dump_hrtimer_clock_base(const void *, const int);
static void dump_hrtimer_base(const void *, const int);
static void dump_active_timers(const void *, ulonglong);
static int get_expires_len(const int, const ulong *, const int);
static void print_timer(const void *);
static ulonglong ktime_to_ns(const void *);
-static void dump_timer_data(void);
-static void dump_timer_data_tvec_bases_v1(void);
-static void dump_timer_data_tvec_bases_v2(void);
-static void dump_timer_data_tvec_bases_v3(void);
-static void dump_timer_data_timer_bases(void);
+static void dump_timer_data(const ulong *cpus);
+static void dump_timer_data_tvec_bases_v1(const ulong *cpus);
+static void dump_timer_data_tvec_bases_v2(const ulong *cpus);
+static void dump_timer_data_tvec_bases_v3(const ulong *cpus);
+static void dump_timer_data_timer_bases(const ulong *cpus);
struct tv_range;
static void init_tv_ranges(struct tv_range *, int, int, int);
static int do_timer_list(ulong,int, ulong *, void *,ulong *,struct tv_range *);
@@ -7353,16 +7353,24 @@ cmd_timer(void)
{
int c;
int rflag;
+ char *cpuspec;
+ ulong *cpus = NULL;
rflag = 0;
- while ((c = getopt(argcnt, args, "r")) != EOF) {
+ while ((c = getopt(argcnt, args, "rC:")) != EOF) {
switch(c)
{
case 'r':
rflag = 1;
break;
+ case 'C':
+ cpuspec = optarg;
+ cpus = get_cpumask_buf();
+ make_cpumask(cpuspec, cpus, FAULT_ON_ERROR, NULL);
+ break;
+
default:
argerrs++;
break;
@@ -7373,15 +7381,18 @@ cmd_timer(void)
cmd_usage(pc->curcmd, SYNOPSIS);
if (rflag)
- dump_hrtimer_data();
+ dump_hrtimer_data(cpus);
else
- dump_timer_data();
+ dump_timer_data(cpus);
+
+ if (cpus)
+ FREEBUF(cpus);
}
static void
-dump_hrtimer_data(void)
+dump_hrtimer_data(const ulong *cpus)
{
- int i, j;
+ int i, j, k = 0;
int hrtimer_max_clock_bases, max_hrtimer_bases;
struct syment * hrtimer_bases;
@@ -7405,7 +7416,10 @@ dump_hrtimer_data(void)
hrtimer_bases = per_cpu_symbol_search("hrtimer_bases");
for (i = 0; i < kt->cpus; i++) {
- if (i)
+ if (cpus && !NUM_IN_BITMAP(cpus, i))
+ continue;
+
+ if (k++)
fprintf(fp, "\n");
if (hide_offline_cpu(i)) {
@@ -7752,7 +7766,7 @@ struct tv_range {
#define TVN (6)
static void
-dump_timer_data(void)
+dump_timer_data(const ulong *cpus)
{
int i;
ulong timer_active;
@@ -7773,16 +7787,16 @@ dump_timer_data(void)
struct tv_range tv[TVN];
if (kt->flags2 & TIMER_BASES) {
- dump_timer_data_timer_bases();
+ dump_timer_data_timer_bases(cpus);
return;
} else if (kt->flags2 & TVEC_BASES_V3) {
- dump_timer_data_tvec_bases_v3();
+ dump_timer_data_tvec_bases_v3(cpus);
return;
} else if (kt->flags & TVEC_BASES_V2) {
- dump_timer_data_tvec_bases_v2();
+ dump_timer_data_tvec_bases_v2(cpus);
return;
} else if (kt->flags & TVEC_BASES_V1) {
- dump_timer_data_tvec_bases_v1();
+ dump_timer_data_tvec_bases_v1(cpus);
return;
}
@@ -7924,7 +7938,7 @@ dump_timer_data(void)
*/
static void
-dump_timer_data_tvec_bases_v1(void)
+dump_timer_data_tvec_bases_v1(const ulong *cpus)
{
int i, cpu, tdx, flen;
struct timer_data *td;
@@ -7947,6 +7961,11 @@ dump_timer_data_tvec_bases_v1(void)
cpu = 0;
next_cpu:
+ if (cpus && !NUM_IN_BITMAP(cpus, cpu)) {
+ if (++cpu < kt->cpus)
+ goto next_cpu;
+ return;
+ }
count = 0;
td = (struct timer_data *)NULL;
@@ -8039,7 +8058,7 @@ next_cpu:
*/
static void
-dump_timer_data_tvec_bases_v2(void)
+dump_timer_data_tvec_bases_v2(const ulong *cpus)
{
int i, cpu, tdx, flen;
struct timer_data *td;
@@ -8073,6 +8092,11 @@ dump_timer_data_tvec_bases_v2(void)
cpu = 0;
next_cpu:
+ if (cpus && !NUM_IN_BITMAP(cpus, cpu)) {
+ if (++cpu < kt->cpus)
+ goto next_cpu;
+ return;
+ }
/*
* hide data of offline cpu and goto next cpu
*/
@@ -8185,7 +8209,7 @@ next_cpu:
* Linux 4.2 timers use new tvec_root, tvec and timer_list structures
*/
static void
-dump_timer_data_tvec_bases_v3(void)
+dump_timer_data_tvec_bases_v3(const ulong *cpus)
{
int i, cpu, tdx, flen;
struct timer_data *td;
@@ -8216,6 +8240,11 @@ dump_timer_data_tvec_bases_v3(void)
cpu = 0;
next_cpu:
+ if (cpus && !NUM_IN_BITMAP(cpus, cpu)) {
+ if (++cpu < kt->cpus)
+ goto next_cpu;
+ return;
+ }
/*
* hide data of offline cpu and goto next cpu
*/
@@ -8758,9 +8787,9 @@ do_timer_list_v4(struct timer_bases_data *data)
* Linux 4.8 timers use new timer_bases[][]
*/
static void
-dump_timer_data_timer_bases(void)
+dump_timer_data_timer_bases(const ulong *cpus)
{
- int i, cpu, flen, base, nr_bases, found, display;
+ int i, cpu, flen, base, nr_bases, found, display, j = 0;
struct syment *sp;
ulong timer_base, jiffies, function;
struct timer_bases_data data;
@@ -8785,6 +8814,11 @@ dump_timer_data_timer_bases(void)
RJUST|LONG_DEC,MKSTR(jiffies)));
next_cpu:
+ if (cpus && !NUM_IN_BITMAP(cpus, cpu)) {
+ if (++cpu < kt->cpus)
+ goto next_cpu;
+ goto done;
+ }
/*
* hide data of offline cpu and goto next cpu
*/
@@ -8803,7 +8837,7 @@ next_cpu:
else
timer_base = sp->value;
- if (cpu)
+ if (j++)
fprintf(fp, "\n");
next_base: