|
|
5593c8 |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
5593c8 |
From: Peter Jones <pjones@redhat.com>
|
|
|
5593c8 |
Date: Tue, 7 Nov 2017 17:12:17 -0500
|
|
|
5593c8 |
Subject: [PATCH] Make pmtimer tsc calibration not take 51 seconds to fail.
|
|
|
5593c8 |
|
|
|
5593c8 |
On my laptop running at 2.4GHz, if I run a VM where tsc calibration
|
|
|
5593c8 |
using pmtimer will fail presuming a broken pmtimer, it takes ~51 seconds
|
|
|
5593c8 |
to do so (as measured with the stopwatch on my phone), with a tsc delta
|
|
|
5593c8 |
of 0x1cd1c85300, or around 125 billion cycles.
|
|
|
5593c8 |
|
|
|
5593c8 |
If instead of trying to wait for 5-200ms to show up on the pmtimer, we try
|
|
|
5593c8 |
to wait for 5-200us, it decides it's broken in ~0x2626aa0 TSCs, aka ~2.4
|
|
|
5593c8 |
million cycles, or more or less instantly.
|
|
|
5593c8 |
|
|
|
5593c8 |
Additionally, this reading the pmtimer was returning 0xffffffff anyway,
|
|
|
5593c8 |
and that's obviously an invalid return. I've added a check for that and
|
|
|
5593c8 |
0 so we don't bother waiting for the test if what we're seeing is dead
|
|
|
5593c8 |
pins with no response at all.
|
|
|
5593c8 |
|
|
|
5593c8 |
If "debug" is includes "pmtimer", you will see one of the following
|
|
|
5593c8 |
three outcomes. If pmtimer gives all 0 or all 1 bits, you will see:
|
|
|
5593c8 |
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 1
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 2
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 3
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 4
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 5
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 6
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 7
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 8
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 9
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:77: pmtimer: 0xffffff bad_reads: 10
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:78: timer is broken; giving up.
|
|
|
5593c8 |
|
|
|
5593c8 |
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware and
|
|
|
5593c8 |
these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
|
|
|
5593c8 |
|
|
|
5593c8 |
If pmtimer gives any other bit patterns but is not actually marching
|
|
|
5593c8 |
forward fast enough to use for clock calibration, you will see:
|
|
|
5593c8 |
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
|
|
|
5593c8 |
|
|
|
5593c8 |
This outcome was tested using grub compiled with GRUB_PMTIMER_IGNORE_BAD_READS
|
|
|
5593c8 |
defined (so as not to trip the bad read test) using qemu+kvm with UEFI
|
|
|
5593c8 |
(OVMF) firmware, and these options: -machine pc-q35-2.10 -cpu Broadwell-noTSX
|
|
|
5593c8 |
|
|
|
5593c8 |
If pmtimer actually works, you'll see something like:
|
|
|
5593c8 |
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:121: pmtimer delta is 0x0 (1904 iterations)
|
|
|
5593c8 |
kern/i386/tsc_pmtimer.c:124: tsc delta is implausible: 0x2626aa0
|
|
|
5593c8 |
|
|
|
5593c8 |
This outcome was tested using qemu+kvm with UEFI (OVMF) firmware, and
|
|
|
5593c8 |
these options: -machine pc-i440fx-2.4 -cpu Broadwell-noTSX
|
|
|
5593c8 |
|
|
|
5593c8 |
I've also tested this outcome on a real Intel Xeon E3-1275v3 on an Intel
|
|
|
5593c8 |
Server Board S1200V3RPS using the SDV.RP.B8 "Release" build here:
|
|
|
5593c8 |
https://firmware.intel.com/sites/default/files/UEFIDevKit_S1200RP_vB8.zip
|
|
|
5593c8 |
|
|
|
5593c8 |
Signed-off-by: Peter Jones <pjones@redhat.com>
|
|
|
5593c8 |
---
|
|
|
5593c8 |
grub-core/kern/i386/tsc_pmtimer.c | 109 +++++++++++++++++++++++++++++++-------
|
|
|
5593c8 |
1 file changed, 89 insertions(+), 20 deletions(-)
|
|
|
5593c8 |
|
|
|
5593c8 |
diff --git a/grub-core/kern/i386/tsc_pmtimer.c b/grub-core/kern/i386/tsc_pmtimer.c
|
|
|
1c6ba0 |
index c9c3616997..ca15c3aacd 100644
|
|
|
5593c8 |
--- a/grub-core/kern/i386/tsc_pmtimer.c
|
|
|
5593c8 |
+++ b/grub-core/kern/i386/tsc_pmtimer.c
|
|
|
5593c8 |
@@ -28,40 +28,101 @@
|
|
|
5593c8 |
#include <grub/acpi.h>
|
|
|
5593c8 |
#include <grub/cpu/io.h>
|
|
|
5593c8 |
|
|
|
5593c8 |
+/*
|
|
|
5593c8 |
+ * Define GRUB_PMTIMER_IGNORE_BAD_READS if you're trying to test a timer that's
|
|
|
5593c8 |
+ * present but doesn't keep time well.
|
|
|
5593c8 |
+ */
|
|
|
5593c8 |
+// #define GRUB_PMTIMER_IGNORE_BAD_READS
|
|
|
5593c8 |
+
|
|
|
5593c8 |
grub_uint64_t
|
|
|
5593c8 |
grub_pmtimer_wait_count_tsc (grub_port_t pmtimer,
|
|
|
5593c8 |
grub_uint16_t num_pm_ticks)
|
|
|
5593c8 |
{
|
|
|
5593c8 |
grub_uint32_t start;
|
|
|
5593c8 |
- grub_uint32_t last;
|
|
|
5593c8 |
- grub_uint32_t cur, end;
|
|
|
5593c8 |
+ grub_uint64_t cur, end;
|
|
|
5593c8 |
grub_uint64_t start_tsc;
|
|
|
5593c8 |
grub_uint64_t end_tsc;
|
|
|
5593c8 |
- int num_iter = 0;
|
|
|
5593c8 |
+ unsigned int num_iter = 0;
|
|
|
5593c8 |
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
|
|
|
5593c8 |
+ int bad_reads = 0;
|
|
|
5593c8 |
+#endif
|
|
|
5593c8 |
|
|
|
5593c8 |
- start = grub_inl (pmtimer) & 0xffffff;
|
|
|
5593c8 |
- last = start;
|
|
|
5593c8 |
+ /*
|
|
|
5593c8 |
+ * Some timers are 24-bit and some are 32-bit, but it doesn't make much
|
|
|
5593c8 |
+ * difference to us. Caring which one we have isn't really worth it since
|
|
|
5593c8 |
+ * the low-order digits will give us enough data to calibrate TSC. So just
|
|
|
5593c8 |
+ * mask the top-order byte off.
|
|
|
5593c8 |
+ */
|
|
|
5593c8 |
+ cur = start = grub_inl (pmtimer) & 0xffffffUL;
|
|
|
5593c8 |
end = start + num_pm_ticks;
|
|
|
5593c8 |
start_tsc = grub_get_tsc ();
|
|
|
5593c8 |
while (1)
|
|
|
5593c8 |
{
|
|
|
5593c8 |
- cur = grub_inl (pmtimer) & 0xffffff;
|
|
|
5593c8 |
- if (cur < last)
|
|
|
5593c8 |
- cur |= 0x1000000;
|
|
|
5593c8 |
- num_iter++;
|
|
|
5593c8 |
+ cur &= 0xffffffffff000000ULL;
|
|
|
5593c8 |
+ cur |= grub_inl (pmtimer) & 0xffffffUL;
|
|
|
5593c8 |
+
|
|
|
5593c8 |
+ end_tsc = grub_get_tsc();
|
|
|
5593c8 |
+
|
|
|
5593c8 |
+#ifndef GRUB_PMTIMER_IGNORE_BAD_READS
|
|
|
5593c8 |
+ /*
|
|
|
5593c8 |
+ * If we get 10 reads in a row that are obviously dead pins, there's no
|
|
|
5593c8 |
+ * reason to do this thousands of times.
|
|
|
5593c8 |
+ */
|
|
|
5593c8 |
+ if (cur == 0xffffffUL || cur == 0)
|
|
|
5593c8 |
+ {
|
|
|
5593c8 |
+ bad_reads++;
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer",
|
|
|
5593c8 |
+ "pmtimer: 0x%"PRIxGRUB_UINT64_T" bad_reads: %d\n",
|
|
|
5593c8 |
+ cur, bad_reads);
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer", "timer is broken; giving up.\n");
|
|
|
5593c8 |
+
|
|
|
5593c8 |
+ if (bad_reads == 10)
|
|
|
5593c8 |
+ return 0;
|
|
|
5593c8 |
+ }
|
|
|
5593c8 |
+#endif
|
|
|
5593c8 |
+
|
|
|
5593c8 |
+ if (cur < start)
|
|
|
5593c8 |
+ cur += 0x1000000;
|
|
|
5593c8 |
+
|
|
|
5593c8 |
if (cur >= end)
|
|
|
5593c8 |
{
|
|
|
5593c8 |
- end_tsc = grub_get_tsc ();
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer", "pmtimer delta is 0x%"PRIxGRUB_UINT64_T"\n",
|
|
|
5593c8 |
+ cur - start);
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer", "tsc delta is 0x%"PRIxGRUB_UINT64_T"\n",
|
|
|
5593c8 |
+ end_tsc - start_tsc);
|
|
|
5593c8 |
return end_tsc - start_tsc;
|
|
|
5593c8 |
}
|
|
|
5593c8 |
- /* Check for broken PM timer.
|
|
|
5593c8 |
- 50000000 TSCs is between 5 ms (10GHz) and 200 ms (250 MHz)
|
|
|
5593c8 |
- if after this time we still don't have 1 ms on pmtimer, then
|
|
|
5593c8 |
- pmtimer is broken.
|
|
|
5593c8 |
+
|
|
|
5593c8 |
+ /*
|
|
|
5593c8 |
+ * Check for broken PM timer. 1ms at 10GHz should be 1E+7 TSCs; at
|
|
|
5593c8 |
+ * 250MHz it should be 2.5E6. So if after 4E+7 TSCs on a 10GHz machine,
|
|
|
5593c8 |
+ * we should have seen pmtimer show 4ms of change (i.e. cur =~
|
|
|
5593c8 |
+ * start+14320); on a 250MHz machine that should be 16ms (start+57280).
|
|
|
5593c8 |
+ * If after this a time we still don't have 1ms on pmtimer, then pmtimer
|
|
|
5593c8 |
+ * is broken.
|
|
|
5593c8 |
+ *
|
|
|
5593c8 |
+ * Likewise, if our code is perfectly efficient and introduces no delays
|
|
|
5593c8 |
+ * whatsoever, on a 10GHz system we should see a TSC delta of 3580 in
|
|
|
5593c8 |
+ * ~3580 iterations. On a 250MHz machine that should be ~900 iterations.
|
|
|
5593c8 |
+ *
|
|
|
5593c8 |
+ * With those factors in mind, there are two limits here. There's a hard
|
|
|
5593c8 |
+ * limit here at 8x our desired pm timer delta, picked as an arbitrarily
|
|
|
5593c8 |
+ * large value that's still not a lot of time to humans, because if we
|
|
|
5593c8 |
+ * get that far this is either an implausibly fast machine or the pmtimer
|
|
|
5593c8 |
+ * is not running. And there's another limit on 4x our 10GHz tsc delta
|
|
|
5593c8 |
+ * without seeing cur converge on our target value.
|
|
|
5593c8 |
*/
|
|
|
5593c8 |
- if ((num_iter & 0xffffff) == 0 && grub_get_tsc () - start_tsc > 5000000) {
|
|
|
5593c8 |
- return 0;
|
|
|
5593c8 |
- }
|
|
|
5593c8 |
+ if ((++num_iter > (grub_uint32_t)num_pm_ticks << 3UL) ||
|
|
|
5593c8 |
+ end_tsc - start_tsc > 40000000)
|
|
|
5593c8 |
+ {
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer",
|
|
|
5593c8 |
+ "pmtimer delta is 0x%"PRIxGRUB_UINT64_T" (%u iterations)\n",
|
|
|
5593c8 |
+ cur - start, num_iter);
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer",
|
|
|
5593c8 |
+ "tsc delta is implausible: 0x%"PRIxGRUB_UINT64_T"\n",
|
|
|
5593c8 |
+ end_tsc - start_tsc);
|
|
|
5593c8 |
+ return 0;
|
|
|
5593c8 |
+ }
|
|
|
5593c8 |
}
|
|
|
5593c8 |
}
|
|
|
5593c8 |
|
|
|
5593c8 |
@@ -74,12 +135,20 @@ grub_tsc_calibrate_from_pmtimer (void)
|
|
|
5593c8 |
|
|
|
5593c8 |
fadt = grub_acpi_find_fadt ();
|
|
|
5593c8 |
if (!fadt)
|
|
|
5593c8 |
- return 0;
|
|
|
5593c8 |
+ {
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer", "No FADT found; not using pmtimer.\n");
|
|
|
5593c8 |
+ return 0;
|
|
|
5593c8 |
+ }
|
|
|
5593c8 |
pmtimer = fadt->pmtimer;
|
|
|
5593c8 |
if (!pmtimer)
|
|
|
5593c8 |
- return 0;
|
|
|
5593c8 |
+ {
|
|
|
5593c8 |
+ grub_dprintf ("pmtimer", "FADT does not specify pmtimer; skipping.\n");
|
|
|
5593c8 |
+ return 0;
|
|
|
5593c8 |
+ }
|
|
|
5593c8 |
|
|
|
5593c8 |
- /* It's 3.579545 MHz clock. Wait 1 ms. */
|
|
|
5593c8 |
+ /*
|
|
|
5593c8 |
+ * It's 3.579545 MHz clock. Wait 1 ms.
|
|
|
5593c8 |
+ */
|
|
|
5593c8 |
tsc_diff = grub_pmtimer_wait_count_tsc (pmtimer, 3580);
|
|
|
5593c8 |
if (tsc_diff == 0)
|
|
|
5593c8 |
return 0;
|