|
|
fd0330 |
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
|
fd0330 |
From: Daniel Axtens <dja@axtens.net>
|
|
|
fd0330 |
Date: Mon, 6 Feb 2023 10:03:20 -0500
|
|
|
fd0330 |
Subject: [PATCH] ieee1275: request memory with ibm,
|
|
|
fd0330 |
client-architecture-support
|
|
|
fd0330 |
|
|
|
fd0330 |
On PowerVM, the first time we boot a Linux partition, we may only get
|
|
|
fd0330 |
256MB of real memory area, even if the partition has more memory.
|
|
|
fd0330 |
|
|
|
fd0330 |
This isn't enough to reliably verify a kernel. Fortunately, the Power
|
|
|
fd0330 |
Architecture Platform Reference (PAPR) defines a method we can call to ask
|
|
|
fd0330 |
for more memory: the broad and powerful ibm,client-architecture-support
|
|
|
fd0330 |
(CAS) method.
|
|
|
fd0330 |
|
|
|
fd0330 |
CAS can do an enormous amount of things on a PAPR platform: as well as
|
|
|
fd0330 |
asking for memory, you can set the supported processor level, the interrupt
|
|
|
fd0330 |
controller, hash vs radix mmu, and so on.
|
|
|
fd0330 |
|
|
|
fd0330 |
If:
|
|
|
fd0330 |
|
|
|
fd0330 |
- we are running under what we think is PowerVM (compatible property of /
|
|
|
fd0330 |
begins with "IBM"), and
|
|
|
fd0330 |
|
|
|
fd0330 |
- the full amount of RMA is less than 512MB (as determined by the reg
|
|
|
fd0330 |
property of /memory)
|
|
|
fd0330 |
|
|
|
fd0330 |
then call CAS as follows: (refer to the Linux on Power Architecture
|
|
|
fd0330 |
Reference, LoPAR, which is public, at B.5.2.3):
|
|
|
fd0330 |
|
|
|
fd0330 |
- Use the "any" PVR value and supply 2 option vectors.
|
|
|
fd0330 |
|
|
|
fd0330 |
- Set option vector 1 (PowerPC Server Processor Architecture Level)
|
|
|
fd0330 |
to "ignore".
|
|
|
fd0330 |
|
|
|
fd0330 |
- Set option vector 2 with default or Linux-like options, including a
|
|
|
fd0330 |
min-rma-size of 512MB.
|
|
|
fd0330 |
|
|
|
fd0330 |
- Set option vector 3 to request Floating Point, VMX and Decimal Floating
|
|
|
fd0330 |
point, but don't abort the boot if we can't get them.
|
|
|
fd0330 |
|
|
|
fd0330 |
- Set option vector 4 to request a minimum VP percentage to 1%, which is
|
|
|
fd0330 |
what Linux requests, and is below the default of 10%. Without this,
|
|
|
fd0330 |
some systems with very large or very small configurations fail to boot.
|
|
|
fd0330 |
|
|
|
fd0330 |
This will cause a CAS reboot and the partition will restart with 512MB
|
|
|
fd0330 |
of RMA. Importantly, grub will notice the 512MB and not call CAS again.
|
|
|
fd0330 |
|
|
|
fd0330 |
Notes about the choices of parameters:
|
|
|
fd0330 |
|
|
|
fd0330 |
- A partition can be configured with only 256MB of memory, which would
|
|
|
fd0330 |
mean this request couldn't be satisfied, but PFW refuses to load with
|
|
|
fd0330 |
only 256MB of memory, so it's a bit moot. SLOF will run fine with 256MB,
|
|
|
fd0330 |
but we will never call CAS under qemu/SLOF because /compatible won't
|
|
|
fd0330 |
begin with "IBM".)
|
|
|
fd0330 |
|
|
|
fd0330 |
- unspecified CAS vectors take on default values. Some of these values
|
|
|
fd0330 |
might restrict the ability of certain hardware configurations to boot.
|
|
|
fd0330 |
This is why we need to specify the VP percentage in vector 4, which is
|
|
|
fd0330 |
in turn why we need to specify vector 3.
|
|
|
fd0330 |
|
|
|
fd0330 |
Finally, we should have enough memory to verify a kernel, and we will
|
|
|
fd0330 |
reach Linux. One of the first things Linux does while still running under
|
|
|
fd0330 |
OpenFirmware is to call CAS with a much fuller set of options (including
|
|
|
fd0330 |
asking for 512MB of memory). Linux includes a much more restrictive set of
|
|
|
fd0330 |
PVR values and processor support levels, and this CAS invocation will likely
|
|
|
fd0330 |
induce another reboot. On this reboot grub will again notice the higher RMA,
|
|
|
fd0330 |
and not call CAS. We will get to Linux again, Linux will call CAS again, but
|
|
|
fd0330 |
because the values are now set for Linux this will not induce another CAS
|
|
|
fd0330 |
reboot and we will finally boot all the way to userspace.
|
|
|
fd0330 |
|
|
|
fd0330 |
On all subsequent boots, everything will be configured with 512MB of RMA,
|
|
|
fd0330 |
so there will be no further CAS reboots from grub. (phyp is super sticky
|
|
|
fd0330 |
with the RMA size - it persists even on cold boots. So if you've ever booted
|
|
|
fd0330 |
Linux in a partition, you'll probably never have grub call CAS. It'll only
|
|
|
fd0330 |
ever fire the first time a partition loads grub, or if you deliberately lower
|
|
|
fd0330 |
the amount of memory your partition has below 512MB.)
|
|
|
fd0330 |
|
|
|
fd0330 |
Signed-off-by: Daniel Axtens <dja@axtens.net>
|
|
|
fd0330 |
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
|
|
|
fd0330 |
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
|
|
|
fd0330 |
(cherry picked from commit d5571590b7de61887efac1c298901455697ba307)
|
|
|
fd0330 |
---
|
|
|
fd0330 |
grub-core/kern/ieee1275/cmain.c | 5 ++
|
|
|
fd0330 |
grub-core/kern/ieee1275/init.c | 167 ++++++++++++++++++++++++++++++++++++++-
|
|
|
fd0330 |
include/grub/ieee1275/ieee1275.h | 12 ++-
|
|
|
fd0330 |
3 files changed, 182 insertions(+), 2 deletions(-)
|
|
|
fd0330 |
|
|
|
fd0330 |
diff --git a/grub-core/kern/ieee1275/cmain.c b/grub-core/kern/ieee1275/cmain.c
|
|
|
fd0330 |
index 04df9d2c66..dce7b84922 100644
|
|
|
fd0330 |
--- a/grub-core/kern/ieee1275/cmain.c
|
|
|
fd0330 |
+++ b/grub-core/kern/ieee1275/cmain.c
|
|
|
fd0330 |
@@ -127,6 +127,11 @@ grub_ieee1275_find_options (void)
|
|
|
fd0330 |
break;
|
|
|
fd0330 |
}
|
|
|
fd0330 |
}
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+#if defined(__powerpc__)
|
|
|
fd0330 |
+ if (grub_strncmp (tmp, "IBM,", 4) == 0)
|
|
|
fd0330 |
+ grub_ieee1275_set_flag (GRUB_IEEE1275_FLAG_CAN_TRY_CAS_FOR_MORE_MEMORY);
|
|
|
fd0330 |
+#endif
|
|
|
fd0330 |
}
|
|
|
fd0330 |
|
|
|
fd0330 |
if (is_smartfirmware)
|
|
|
fd0330 |
diff --git a/grub-core/kern/ieee1275/init.c b/grub-core/kern/ieee1275/init.c
|
|
|
fd0330 |
index 6581c2c996..8ae405bc79 100644
|
|
|
fd0330 |
--- a/grub-core/kern/ieee1275/init.c
|
|
|
fd0330 |
+++ b/grub-core/kern/ieee1275/init.c
|
|
|
fd0330 |
@@ -202,11 +202,176 @@ heap_init (grub_uint64_t addr, grub_uint64_t len, grub_memory_type_t type,
|
|
|
fd0330 |
return 0;
|
|
|
fd0330 |
}
|
|
|
fd0330 |
|
|
|
fd0330 |
-static void
|
|
|
fd0330 |
+/*
|
|
|
fd0330 |
+ * How much memory does OF believe it has? (regardless of whether
|
|
|
fd0330 |
+ * it's accessible or not)
|
|
|
fd0330 |
+ */
|
|
|
fd0330 |
+static grub_err_t
|
|
|
fd0330 |
+grub_ieee1275_total_mem (grub_uint64_t *total)
|
|
|
fd0330 |
+{
|
|
|
fd0330 |
+ grub_ieee1275_phandle_t root;
|
|
|
fd0330 |
+ grub_ieee1275_phandle_t memory;
|
|
|
fd0330 |
+ grub_uint32_t reg[4];
|
|
|
fd0330 |
+ grub_ssize_t reg_size;
|
|
|
fd0330 |
+ grub_uint32_t address_cells = 1;
|
|
|
fd0330 |
+ grub_uint32_t size_cells = 1;
|
|
|
fd0330 |
+ grub_uint64_t size;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ /* If we fail to get to the end, report 0. */
|
|
|
fd0330 |
+ *total = 0;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ /* Determine the format of each entry in `reg'. */
|
|
|
fd0330 |
+ if (grub_ieee1275_finddevice ("/", &root))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "couldn't find / node");
|
|
|
fd0330 |
+ if (grub_ieee1275_get_integer_property (root, "#address-cells", &address_cells,
|
|
|
fd0330 |
+ sizeof (address_cells), 0))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "couldn't examine #address-cells");
|
|
|
fd0330 |
+ if (grub_ieee1275_get_integer_property (root, "#size-cells", &size_cells,
|
|
|
fd0330 |
+ sizeof (size_cells), 0))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "couldn't examine #size-cells");
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ if (size_cells > address_cells)
|
|
|
fd0330 |
+ address_cells = size_cells;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ /* Load `/memory/reg'. */
|
|
|
fd0330 |
+ if (grub_ieee1275_finddevice ("/memory", &memory))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "couldn't find /memory node");
|
|
|
fd0330 |
+ if (grub_ieee1275_get_integer_property (memory, "reg", reg,
|
|
|
fd0330 |
+ sizeof (reg), ®_size))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "couldn't examine /memory/reg property");
|
|
|
fd0330 |
+ if (reg_size < 0 || (grub_size_t) reg_size > sizeof (reg))
|
|
|
fd0330 |
+ return grub_error (GRUB_ERR_UNKNOWN_DEVICE, "/memory response buffer exceeded");
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ if (grub_ieee1275_test_flag (GRUB_IEEE1275_FLAG_BROKEN_ADDRESS_CELLS))
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ address_cells = 1;
|
|
|
fd0330 |
+ size_cells = 1;
|
|
|
fd0330 |
+ }
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ /* Decode only the size */
|
|
|
fd0330 |
+ size = reg[address_cells];
|
|
|
fd0330 |
+ if (size_cells == 2)
|
|
|
fd0330 |
+ size = (size << 32) | reg[address_cells + 1];
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ *total = size;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ return grub_errno;
|
|
|
fd0330 |
+}
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+#if defined(__powerpc__)
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+/* See PAPR or arch/powerpc/kernel/prom_init.c */
|
|
|
fd0330 |
+struct option_vector2
|
|
|
fd0330 |
+{
|
|
|
fd0330 |
+ grub_uint8_t byte1;
|
|
|
fd0330 |
+ grub_uint16_t reserved;
|
|
|
fd0330 |
+ grub_uint32_t real_base;
|
|
|
fd0330 |
+ grub_uint32_t real_size;
|
|
|
fd0330 |
+ grub_uint32_t virt_base;
|
|
|
fd0330 |
+ grub_uint32_t virt_size;
|
|
|
fd0330 |
+ grub_uint32_t load_base;
|
|
|
fd0330 |
+ grub_uint32_t min_rma;
|
|
|
fd0330 |
+ grub_uint32_t min_load;
|
|
|
fd0330 |
+ grub_uint8_t min_rma_percent;
|
|
|
fd0330 |
+ grub_uint8_t max_pft_size;
|
|
|
fd0330 |
+} GRUB_PACKED;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+struct pvr_entry
|
|
|
fd0330 |
+{
|
|
|
fd0330 |
+ grub_uint32_t mask;
|
|
|
fd0330 |
+ grub_uint32_t entry;
|
|
|
fd0330 |
+};
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+struct cas_vector
|
|
|
fd0330 |
+{
|
|
|
fd0330 |
+ struct
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ struct pvr_entry terminal;
|
|
|
fd0330 |
+ } pvr_list;
|
|
|
fd0330 |
+ grub_uint8_t num_vecs;
|
|
|
fd0330 |
+ grub_uint8_t vec1_size;
|
|
|
fd0330 |
+ grub_uint8_t vec1;
|
|
|
fd0330 |
+ grub_uint8_t vec2_size;
|
|
|
fd0330 |
+ struct option_vector2 vec2;
|
|
|
fd0330 |
+ grub_uint8_t vec3_size;
|
|
|
fd0330 |
+ grub_uint16_t vec3;
|
|
|
fd0330 |
+ grub_uint8_t vec4_size;
|
|
|
fd0330 |
+ grub_uint16_t vec4;
|
|
|
fd0330 |
+} GRUB_PACKED;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+/*
|
|
|
fd0330 |
+ * Call ibm,client-architecture-support to try to get more RMA.
|
|
|
fd0330 |
+ * We ask for 512MB which should be enough to verify a distro kernel.
|
|
|
fd0330 |
+ * We ignore most errors: if we don't succeed we'll proceed with whatever
|
|
|
fd0330 |
+ * memory we have.
|
|
|
fd0330 |
+ */
|
|
|
fd0330 |
+static void
|
|
|
fd0330 |
+grub_ieee1275_ibm_cas (void)
|
|
|
fd0330 |
+{
|
|
|
fd0330 |
+ int rc;
|
|
|
fd0330 |
+ grub_ieee1275_ihandle_t root;
|
|
|
fd0330 |
+ struct cas_args
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ struct grub_ieee1275_common_hdr common;
|
|
|
fd0330 |
+ grub_ieee1275_cell_t method;
|
|
|
fd0330 |
+ grub_ieee1275_ihandle_t ihandle;
|
|
|
fd0330 |
+ grub_ieee1275_cell_t cas_addr;
|
|
|
fd0330 |
+ grub_ieee1275_cell_t result;
|
|
|
fd0330 |
+ } args;
|
|
|
fd0330 |
+ struct cas_vector vector =
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ .pvr_list = { { 0x00000000, 0xffffffff } }, /* any processor */
|
|
|
fd0330 |
+ .num_vecs = 4 - 1,
|
|
|
fd0330 |
+ .vec1_size = 0,
|
|
|
fd0330 |
+ .vec1 = 0x80, /* ignore */
|
|
|
fd0330 |
+ .vec2_size = 1 + sizeof (struct option_vector2) - 2,
|
|
|
fd0330 |
+ .vec2 = {
|
|
|
fd0330 |
+ 0, 0, -1, -1, -1, -1, -1, 512, -1, 0, 48
|
|
|
fd0330 |
+ },
|
|
|
fd0330 |
+ .vec3_size = 2 - 1,
|
|
|
fd0330 |
+ .vec3 = 0x00e0, /* ask for FP + VMX + DFP but don't halt if unsatisfied */
|
|
|
fd0330 |
+ .vec4_size = 2 - 1,
|
|
|
fd0330 |
+ .vec4 = 0x0001, /* set required minimum capacity % to the lowest value */
|
|
|
fd0330 |
+ };
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ INIT_IEEE1275_COMMON (&args.common, "call-method", 3, 2);
|
|
|
fd0330 |
+ args.method = (grub_ieee1275_cell_t) "ibm,client-architecture-support";
|
|
|
fd0330 |
+ rc = grub_ieee1275_open ("/", &root);
|
|
|
fd0330 |
+ if (rc)
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ grub_error (GRUB_ERR_IO, "could not open root when trying to call CAS");
|
|
|
fd0330 |
+ return;
|
|
|
fd0330 |
+ }
|
|
|
fd0330 |
+ args.ihandle = root;
|
|
|
fd0330 |
+ args.cas_addr = (grub_ieee1275_cell_t) &vector;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ grub_printf ("Calling ibm,client-architecture-support from grub...");
|
|
|
fd0330 |
+ IEEE1275_CALL_ENTRY_FN (&args);
|
|
|
fd0330 |
+ grub_printf ("done\n");
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ grub_ieee1275_close (root);
|
|
|
fd0330 |
+}
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+#endif /* __powerpc__ */
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+static void
|
|
|
fd0330 |
grub_claim_heap (void)
|
|
|
fd0330 |
{
|
|
|
fd0330 |
unsigned long total = 0;
|
|
|
fd0330 |
|
|
|
fd0330 |
+#if defined(__powerpc__)
|
|
|
fd0330 |
+ if (grub_ieee1275_test_flag (GRUB_IEEE1275_FLAG_CAN_TRY_CAS_FOR_MORE_MEMORY))
|
|
|
fd0330 |
+ {
|
|
|
fd0330 |
+ grub_uint64_t rma_size;
|
|
|
fd0330 |
+ grub_err_t err;
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+ err = grub_ieee1275_total_mem (&rma_size);
|
|
|
fd0330 |
+ /* if we have an error, don't call CAS, just hope for the best */
|
|
|
fd0330 |
+ if (err == GRUB_ERR_NONE && rma_size < (512 * 1024 * 1024))
|
|
|
fd0330 |
+ grub_ieee1275_ibm_cas ();
|
|
|
fd0330 |
+ }
|
|
|
fd0330 |
+#endif
|
|
|
fd0330 |
+
|
|
|
fd0330 |
grub_machine_mmap_iterate (heap_init, &total);
|
|
|
fd0330 |
}
|
|
|
fd0330 |
#endif
|
|
|
fd0330 |
diff --git a/include/grub/ieee1275/ieee1275.h b/include/grub/ieee1275/ieee1275.h
|
|
|
fd0330 |
index 6a1d3e5d70..560c968460 100644
|
|
|
fd0330 |
--- a/include/grub/ieee1275/ieee1275.h
|
|
|
fd0330 |
+++ b/include/grub/ieee1275/ieee1275.h
|
|
|
fd0330 |
@@ -138,7 +138,17 @@ enum grub_ieee1275_flag
|
|
|
fd0330 |
|
|
|
fd0330 |
GRUB_IEEE1275_FLAG_RAW_DEVNAMES,
|
|
|
fd0330 |
|
|
|
fd0330 |
- GRUB_IEEE1275_FLAG_DISABLE_VIDEO_SUPPORT
|
|
|
fd0330 |
+ GRUB_IEEE1275_FLAG_DISABLE_VIDEO_SUPPORT,
|
|
|
fd0330 |
+
|
|
|
fd0330 |
+#if defined(__powerpc__)
|
|
|
fd0330 |
+ /*
|
|
|
fd0330 |
+ * On PFW, the first time we boot a Linux partition, we may only get 256MB of
|
|
|
fd0330 |
+ * real memory area, even if the partition has more memory. Set this flag if
|
|
|
fd0330 |
+ * we think we're running under PFW. Then, if this flag is set, and the RMA is
|
|
|
fd0330 |
+ * only 256MB in size, try asking for more with CAS.
|
|
|
fd0330 |
+ */
|
|
|
fd0330 |
+ GRUB_IEEE1275_FLAG_CAN_TRY_CAS_FOR_MORE_MEMORY,
|
|
|
fd0330 |
+#endif
|
|
|
fd0330 |
};
|
|
|
fd0330 |
|
|
|
fd0330 |
extern int EXPORT_FUNC(grub_ieee1275_test_flag) (enum grub_ieee1275_flag flag);
|