anitazha / rpms / ndctl

Forked from rpms/ndctl 2 years ago
Clone

Blame SOURCES/0184-cxl-memdev-Add-reserve-free-dpa-commands.patch

e0018b
From 6624f4fdf7ba43039111c996dfd3982b4fdc43bc Mon Sep 17 00:00:00 2001
e0018b
From: Dan Williams <dan.j.williams@intel.com>
e0018b
Date: Thu, 14 Jul 2022 10:02:44 -0700
e0018b
Subject: [PATCH 184/217] cxl/memdev: Add {reserve,free}-dpa commands
e0018b
e0018b
Add helper commands for managing allocations of DPA (device physical
e0018b
address) capacity on a set of CXL memory devices.
e0018b
e0018b
The main convenience this command affords is automatically picking the next
e0018b
decoder to allocate per-memdev.
e0018b
e0018b
For example, to allocate 256MiB from all endpoints that are covered by a
e0018b
given root decoder, and collect those resulting endpoint-decoders into an
e0018b
array:
e0018b
e0018b
  readarray -t mem < <(cxl list -M -d $decoder | jq -r ".[].memdev")
e0018b
  readarray -t endpoint < <(cxl reserve-dpa -t pmem ${mem[*]} -s $((256<<20)) |
e0018b
                            jq -r ".[] | .decoder.decoder")
e0018b
e0018b
Link: https://lore.kernel.org/r/165781816425.1555691.17958897857798325111.stgit@dwillia2-xfh.jf.intel.com
e0018b
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
e0018b
[vishal: fix typo pointed out by Jonathan]
e0018b
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
e0018b
---
e0018b
 .clang-format                         |   1 +
e0018b
 Documentation/cxl/cxl-free-dpa.txt    |  53 +++++
e0018b
 Documentation/cxl/cxl-reserve-dpa.txt |  67 ++++++
e0018b
 Documentation/cxl/lib/libcxl.txt      |   2 +
e0018b
 Documentation/cxl/meson.build         |   2 +
e0018b
 cxl/builtin.h                         |   2 +
e0018b
 cxl/cxl.c                             |   2 +
e0018b
 cxl/filter.c                          |   4 +-
e0018b
 cxl/filter.h                          |   2 +
e0018b
 cxl/lib/libcxl.c                      |  86 ++++++++
e0018b
 cxl/lib/libcxl.sym                    |   4 +
e0018b
 cxl/libcxl.h                          |   9 +
e0018b
 cxl/memdev.c                          | 280 +++++++++++++++++++++++++-
e0018b
 13 files changed, 511 insertions(+), 3 deletions(-)
e0018b
 create mode 100644 Documentation/cxl/cxl-free-dpa.txt
e0018b
 create mode 100644 Documentation/cxl/cxl-reserve-dpa.txt
e0018b
e0018b
diff --git a/.clang-format b/.clang-format
e0018b
index 6aabcb6..7254a1b 100644
e0018b
--- a/.clang-format
e0018b
+++ b/.clang-format
e0018b
@@ -81,6 +81,7 @@ ForEachMacros:
e0018b
   - 'cxl_bus_foreach'
e0018b
   - 'cxl_port_foreach'
e0018b
   - 'cxl_decoder_foreach'
e0018b
+  - 'cxl_decoder_foreach_reverse'
e0018b
   - 'cxl_target_foreach'
e0018b
   - 'cxl_dport_foreach'
e0018b
   - 'cxl_endpoint_foreach'
e0018b
diff --git a/Documentation/cxl/cxl-free-dpa.txt b/Documentation/cxl/cxl-free-dpa.txt
e0018b
new file mode 100644
e0018b
index 0000000..73fb048
e0018b
--- /dev/null
e0018b
+++ b/Documentation/cxl/cxl-free-dpa.txt
e0018b
@@ -0,0 +1,53 @@
e0018b
+// SPDX-License-Identifier: GPL-2.0
e0018b
+
e0018b
+cxl-free-dpa(1)
e0018b
+===============
e0018b
+
e0018b
+NAME
e0018b
+----
e0018b
+cxl-free-dpa - release device-physical address space
e0018b
+
e0018b
+SYNOPSIS
e0018b
+--------
e0018b
+[verse]
e0018b
+'cxl free-dpa' <mem0> [<mem1>..<memN>] [<options>]
e0018b
+
e0018b
+The CXL region provisioning process proceeds in multiple steps. One of
e0018b
+the steps is identifying and reserving the DPA span that each member of
e0018b
+the interleave-set (region) contributes in advance of attaching that
e0018b
+allocation to a region. For development, test, and debug purposes this
e0018b
+command is a helper to find the last allocated decoder on a device and
e0018b
+zero-out / free its DPA allocation.
e0018b
+
e0018b
+OPTIONS
e0018b
+-------
e0018b
+<memory device(s)>::
e0018b
+include::memdev-option.txt[]
e0018b
+
e0018b
+-d::
e0018b
+--decoder::
e0018b
+	Specify the decoder to free. The CXL specification
e0018b
+	mandates that DPA must be released in the reverse order it was
e0018b
+	allocated. See linkcxl:cxl-reserve-dpa[1]
e0018b
+
e0018b
+-t::
e0018b
+--type::
e0018b
+	Constrain the search for "last allocated decoder" to decoders targeting
e0018b
+	the given partition.
e0018b
+
e0018b
+-f::
e0018b
+--force::
e0018b
+	The kernel enforces CXL DPA ordering constraints on deallocation events,
e0018b
+	and the tool anticipates those and fails operations that are expected to
e0018b
+	fail without sending them to the kernel. For test purposes, continue to
e0018b
+	attempt "expected to fail" operations to exercise the driver.
e0018b
+
e0018b
+-v::
e0018b
+	Turn on verbose debug messages in the library (if libcxl was built with
e0018b
+	logging and debug enabled).
e0018b
+
e0018b
+include::../copyright.txt[]
e0018b
+
e0018b
+SEE ALSO
e0018b
+--------
e0018b
+linkcxl:cxl-reserve-dpa[1]
e0018b
diff --git a/Documentation/cxl/cxl-reserve-dpa.txt b/Documentation/cxl/cxl-reserve-dpa.txt
e0018b
new file mode 100644
e0018b
index 0000000..5e79ef2
e0018b
--- /dev/null
e0018b
+++ b/Documentation/cxl/cxl-reserve-dpa.txt
e0018b
@@ -0,0 +1,67 @@
e0018b
+// SPDX-License-Identifier: GPL-2.0
e0018b
+
e0018b
+cxl-reserve-dpa(1)
e0018b
+==================
e0018b
+
e0018b
+NAME
e0018b
+----
e0018b
+cxl-reserve-dpa - allocate device-physical address space
e0018b
+
e0018b
+SYNOPSIS
e0018b
+--------
e0018b
+[verse]
e0018b
+'cxl reserve-dpa' <mem0> [<mem1>..<memN>] [<options>]
e0018b
+
e0018b
+The CXL region provisioning process proceeds in multiple steps. One of
e0018b
+the steps is identifying and reserving the DPA span that each member of
e0018b
+the interleave-set (region) contributes in advance of attaching that
e0018b
+allocation to a region. For development, test, and debug purposes this
e0018b
+command is a helper to find the next available decoder on endpoint
e0018b
+(memdev) and mark a span of DPA as busy.
e0018b
+
e0018b
+OPTIONS
e0018b
+-------
e0018b
+<memory device(s)>::
e0018b
+include::memdev-option.txt[]
e0018b
+
e0018b
+-d::
e0018b
+--decoder::
e0018b
+	Specify the decoder to attempt the allocation. The CXL specification
e0018b
+	mandates that allocations must be ordered by DPA and decoder instance.
e0018b
+	I.e. the lowest DPA allocation on the device is covered by decoder0, and
e0018b
+	the last / highest DPA allocation is covered by the last decoder. This
e0018b
+	ordering is enforced by the kernel. By default the tool picks the 'next
e0018b
+	available' decoder.
e0018b
+
e0018b
+-t::
e0018b
+--type::
e0018b
+	Select the partition for the allocation. CXL devices implement a
e0018b
+	partition that divdes 'ram' and 'pmem' capacity, where 'pmem' capacity
e0018b
+	consumes the higher DPA capacity above the partition boundary. The type
e0018b
+	defaults to 'pmem'. Note that given CXL DPA allocation constraints, once
e0018b
+	any 'pmem' allocation is established then all remaining 'ram' capacity
e0018b
+	becomes reserved (skipped).
e0018b
+
e0018b
+-f::
e0018b
+--force::
e0018b
+	The kernel enforces CXL DPA allocation ordering constraints, and
e0018b
+	the tool anticipates those and fails operations that are expected to
e0018b
+	fail without sending them to the kernel. For test purposes, continue to
e0018b
+	attempt "expected to fail" operations to exercise the driver.
e0018b
+
e0018b
+-s::
e0018b
+--size::
e0018b
+	Specify the size of the allocation. This option supports the suffixes
e0018b
+	"k" or "K" for KiB, "m" or "M" for MiB, "g" or "G" for GiB and "t" or
e0018b
+	"T" for TiB. This defaults to "all available capacity of the specified
e0018b
+	type".
e0018b
+
e0018b
+-v::
e0018b
+	Turn on verbose debug messages in the library (if libcxl was built with
e0018b
+	logging and debug enabled).
e0018b
+
e0018b
+include::../copyright.txt[]
e0018b
+
e0018b
+SEE ALSO
e0018b
+--------
e0018b
+linkcxl:cxl-free-dpa[1]
e0018b
diff --git a/Documentation/cxl/lib/libcxl.txt b/Documentation/cxl/lib/libcxl.txt
e0018b
index 90fe338..7a38ce4 100644
e0018b
--- a/Documentation/cxl/lib/libcxl.txt
e0018b
+++ b/Documentation/cxl/lib/libcxl.txt
e0018b
@@ -394,6 +394,7 @@ unsigned long long cxl_decoder_get_resource(struct cxl_decoder *decoder);
e0018b
 unsigned long long cxl_decoder_get_size(struct cxl_decoder *decoder);
e0018b
 unsigned long long cxl_decoder_get_dpa_resource(struct cxl_decoder *decoder);
e0018b
 unsigned long long cxl_decoder_get_dpa_size(struct cxl_decoder *decoder);
e0018b
+int cxl_decoder_set_dpa_size(struct cxl_decoder *decoder, unsigned long long size);
e0018b
 const char *cxl_decoder_get_devname(struct cxl_decoder *decoder);
e0018b
 int cxl_decoder_get_id(struct cxl_decoder *decoder);
e0018b
 int cxl_decoder_get_nr_targets(struct cxl_decoder *decoder);
e0018b
@@ -413,6 +414,7 @@ enum cxl_decoder_mode {
e0018b
 	CXL_DECODER_MODE_RAM,
e0018b
 };
e0018b
 enum cxl_decoder_mode cxl_decoder_get_mode(struct cxl_decoder *decoder);
e0018b
+int cxl_decoder_set_mode(struct cxl_decoder *decoder, enum cxl_decoder_mode mode);
e0018b
 
e0018b
 bool cxl_decoder_is_pmem_capable(struct cxl_decoder *decoder);
e0018b
 bool cxl_decoder_is_volatile_capable(struct cxl_decoder *decoder);
e0018b
diff --git a/Documentation/cxl/meson.build b/Documentation/cxl/meson.build
e0018b
index 974a5a4..d019dfc 100644
e0018b
--- a/Documentation/cxl/meson.build
e0018b
+++ b/Documentation/cxl/meson.build
e0018b
@@ -36,6 +36,8 @@ cxl_manpages = [
e0018b
   'cxl-disable-port.txt',
e0018b
   'cxl-disable-bus.txt',
e0018b
   'cxl-set-partition.txt',
e0018b
+  'cxl-reserve-dpa.txt',
e0018b
+  'cxl-free-dpa.txt',
e0018b
 ]
e0018b
 
e0018b
 foreach man : cxl_manpages
e0018b
diff --git a/cxl/builtin.h b/cxl/builtin.h
e0018b
index a437bc3..9e6fc62 100644
e0018b
--- a/cxl/builtin.h
e0018b
+++ b/cxl/builtin.h
e0018b
@@ -12,6 +12,8 @@ int cmd_init_labels(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_check_labels(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_disable_memdev(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_enable_memdev(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
+int cmd_reserve_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
+int cmd_free_dpa(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_disable_port(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_enable_port(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
 int cmd_set_partition(int argc, const char **argv, struct cxl_ctx *ctx);
e0018b
diff --git a/cxl/cxl.c b/cxl/cxl.c
e0018b
index aa4ce61..ef4cda9 100644
e0018b
--- a/cxl/cxl.c
e0018b
+++ b/cxl/cxl.c
e0018b
@@ -66,6 +66,8 @@ static struct cmd_struct commands[] = {
e0018b
 	{ "write-labels", .c_fn = cmd_write_labels },
e0018b
 	{ "disable-memdev", .c_fn = cmd_disable_memdev },
e0018b
 	{ "enable-memdev", .c_fn = cmd_enable_memdev },
e0018b
+	{ "reserve-dpa", .c_fn = cmd_reserve_dpa },
e0018b
+	{ "free-dpa", .c_fn = cmd_free_dpa },
e0018b
 	{ "disable-port", .c_fn = cmd_disable_port },
e0018b
 	{ "enable-port", .c_fn = cmd_enable_port },
e0018b
 	{ "set-partition", .c_fn = cmd_set_partition },
e0018b
diff --git a/cxl/filter.c b/cxl/filter.c
e0018b
index 2f88a9d..e5fab19 100644
e0018b
--- a/cxl/filter.c
e0018b
+++ b/cxl/filter.c
e0018b
@@ -380,8 +380,8 @@ struct cxl_port *util_cxl_port_filter_by_memdev(struct cxl_port *port,
e0018b
 	return NULL;
e0018b
 }
e0018b
 
e0018b
-static struct cxl_decoder *util_cxl_decoder_filter(struct cxl_decoder *decoder,
e0018b
-						   const char *__ident)
e0018b
+struct cxl_decoder *util_cxl_decoder_filter(struct cxl_decoder *decoder,
e0018b
+					    const char *__ident)
e0018b
 {
e0018b
 	struct cxl_port *port = cxl_decoder_get_port(decoder);
e0018b
 	int pid, did;
e0018b
diff --git a/cxl/filter.h b/cxl/filter.h
e0018b
index 9557943..c913daf 100644
e0018b
--- a/cxl/filter.h
e0018b
+++ b/cxl/filter.h
e0018b
@@ -50,6 +50,8 @@ struct cxl_target *util_cxl_target_filter_by_memdev(struct cxl_target *target,
e0018b
 struct cxl_dport *util_cxl_dport_filter_by_memdev(struct cxl_dport *dport,
e0018b
 						  const char *ident,
e0018b
 						  const char *serial);
e0018b
+struct cxl_decoder *util_cxl_decoder_filter(struct cxl_decoder *decoder,
e0018b
+					    const char *__ident);
e0018b
 int cxl_filter_walk(struct cxl_ctx *ctx, struct cxl_filter_params *param);
e0018b
 bool cxl_filter_has(const char *needle, const char *__filter);
e0018b
 #endif /* _CXL_UTIL_FILTER_H_ */
e0018b
diff --git a/cxl/lib/libcxl.c b/cxl/lib/libcxl.c
e0018b
index b802e5d..e52896f 100644
e0018b
--- a/cxl/lib/libcxl.c
e0018b
+++ b/cxl/lib/libcxl.c
e0018b
@@ -1121,6 +1121,20 @@ CXL_EXPORT struct cxl_decoder *cxl_decoder_get_next(struct cxl_decoder *decoder)
e0018b
 	return list_next(&port->decoders, decoder, list);
e0018b
 }
e0018b
 
e0018b
+CXL_EXPORT struct cxl_decoder *cxl_decoder_get_last(struct cxl_port *port)
e0018b
+{
e0018b
+	cxl_decoders_init(port);
e0018b
+
e0018b
+	return list_tail(&port->decoders, struct cxl_decoder, list);
e0018b
+}
e0018b
+
e0018b
+CXL_EXPORT struct cxl_decoder *cxl_decoder_get_prev(struct cxl_decoder *decoder)
e0018b
+{
e0018b
+	struct cxl_port *port = decoder->port;
e0018b
+
e0018b
+	return list_prev(&port->decoders, decoder, list);
e0018b
+}
e0018b
+
e0018b
 CXL_EXPORT struct cxl_ctx *cxl_decoder_get_ctx(struct cxl_decoder *decoder)
e0018b
 {
e0018b
 	return decoder->ctx;
e0018b
@@ -1176,6 +1190,78 @@ cxl_decoder_get_dpa_size(struct cxl_decoder *decoder)
e0018b
 	return decoder->dpa_size;
e0018b
 }
e0018b
 
e0018b
+CXL_EXPORT int cxl_decoder_set_dpa_size(struct cxl_decoder *decoder,
e0018b
+					unsigned long long size)
e0018b
+{
e0018b
+	struct cxl_port *port = cxl_decoder_get_port(decoder);
e0018b
+	struct cxl_ctx *ctx = cxl_decoder_get_ctx(decoder);
e0018b
+	char *path = decoder->dev_buf;
e0018b
+	int len = decoder->buf_len, rc;
e0018b
+	char buf[SYSFS_ATTR_SIZE];
e0018b
+
e0018b
+	if (!cxl_port_is_endpoint(port)) {
e0018b
+		err(ctx, "%s: not an endpoint decoder\n",
e0018b
+		    cxl_decoder_get_devname(decoder));
e0018b
+		return -EINVAL;
e0018b
+	}
e0018b
+
e0018b
+	if (snprintf(path, len, "%s/dpa_size", decoder->dev_path) >= len) {
e0018b
+		err(ctx, "%s: buffer too small!\n",
e0018b
+		    cxl_decoder_get_devname(decoder));
e0018b
+		return -ENOMEM;
e0018b
+	}
e0018b
+
e0018b
+	sprintf(buf, "%#llx\n", size);
e0018b
+	rc = sysfs_write_attr(ctx, path, buf);
e0018b
+	if (rc < 0)
e0018b
+		return rc;
e0018b
+
e0018b
+	decoder->dpa_size = size;
e0018b
+	return 0;
e0018b
+}
e0018b
+
e0018b
+CXL_EXPORT int cxl_decoder_set_mode(struct cxl_decoder *decoder,
e0018b
+				    enum cxl_decoder_mode mode)
e0018b
+{
e0018b
+	struct cxl_port *port = cxl_decoder_get_port(decoder);
e0018b
+	struct cxl_ctx *ctx = cxl_decoder_get_ctx(decoder);
e0018b
+	char *path = decoder->dev_buf;
e0018b
+	int len = decoder->buf_len, rc;
e0018b
+	char buf[SYSFS_ATTR_SIZE];
e0018b
+
e0018b
+	if (!cxl_port_is_endpoint(port)) {
e0018b
+		err(ctx, "%s: not an endpoint decoder\n",
e0018b
+		    cxl_decoder_get_devname(decoder));
e0018b
+		return -EINVAL;
e0018b
+	}
e0018b
+
e0018b
+	switch (mode) {
e0018b
+	case CXL_DECODER_MODE_PMEM:
e0018b
+		sprintf(buf, "pmem");
e0018b
+		break;
e0018b
+	case CXL_DECODER_MODE_RAM:
e0018b
+		sprintf(buf, "ram");
e0018b
+		break;
e0018b
+	default:
e0018b
+		err(ctx, "%s: unsupported mode: %d\n",
e0018b
+		    cxl_decoder_get_devname(decoder), mode);
e0018b
+		return -EINVAL;
e0018b
+	}
e0018b
+
e0018b
+	if (snprintf(path, len, "%s/mode", decoder->dev_path) >= len) {
e0018b
+		err(ctx, "%s: buffer too small!\n",
e0018b
+		    cxl_decoder_get_devname(decoder));
e0018b
+		return -ENOMEM;
e0018b
+	}
e0018b
+
e0018b
+	rc = sysfs_write_attr(ctx, path, buf);
e0018b
+	if (rc < 0)
e0018b
+		return rc;
e0018b
+
e0018b
+	decoder->mode = mode;
e0018b
+	return 0;
e0018b
+}
e0018b
+
e0018b
 CXL_EXPORT enum cxl_decoder_mode
e0018b
 cxl_decoder_get_mode(struct cxl_decoder *decoder)
e0018b
 {
e0018b
diff --git a/cxl/lib/libcxl.sym b/cxl/lib/libcxl.sym
e0018b
index 88c5a7e..7712de0 100644
e0018b
--- a/cxl/lib/libcxl.sym
e0018b
+++ b/cxl/lib/libcxl.sym
e0018b
@@ -173,4 +173,8 @@ global:
e0018b
 	cxl_decoder_get_dpa_resource;
e0018b
 	cxl_decoder_get_dpa_size;
e0018b
 	cxl_decoder_get_mode;
e0018b
+	cxl_decoder_get_last;
e0018b
+	cxl_decoder_get_prev;
e0018b
+	cxl_decoder_set_dpa_size;
e0018b
+	cxl_decoder_set_mode;
e0018b
 } LIBCXL_2;
e0018b
diff --git a/cxl/libcxl.h b/cxl/libcxl.h
e0018b
index 1436dc4..33a216e 100644
e0018b
--- a/cxl/libcxl.h
e0018b
+++ b/cxl/libcxl.h
e0018b
@@ -139,6 +139,7 @@ enum cxl_decoder_mode {
e0018b
 	CXL_DECODER_MODE_PMEM,
e0018b
 	CXL_DECODER_MODE_RAM,
e0018b
 };
e0018b
+
e0018b
 static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
e0018b
 {
e0018b
 	static const char *names[] = {
e0018b
@@ -154,6 +155,10 @@ static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
e0018b
 }
e0018b
 
e0018b
 enum cxl_decoder_mode cxl_decoder_get_mode(struct cxl_decoder *decoder);
e0018b
+int cxl_decoder_set_mode(struct cxl_decoder *decoder,
e0018b
+			 enum cxl_decoder_mode mode);
e0018b
+int cxl_decoder_set_dpa_size(struct cxl_decoder *decoder,
e0018b
+			     unsigned long long size);
e0018b
 const char *cxl_decoder_get_devname(struct cxl_decoder *decoder);
e0018b
 struct cxl_target *cxl_decoder_get_target_by_memdev(struct cxl_decoder *decoder,
e0018b
 						    struct cxl_memdev *memdev);
e0018b
@@ -182,6 +187,10 @@ bool cxl_decoder_is_locked(struct cxl_decoder *decoder);
e0018b
 	for (decoder = cxl_decoder_get_first(port); decoder != NULL;           \
e0018b
 	     decoder = cxl_decoder_get_next(decoder))
e0018b
 
e0018b
+#define cxl_decoder_foreach_reverse(port, decoder)                             \
e0018b
+	for (decoder = cxl_decoder_get_last(port); decoder != NULL;           \
e0018b
+	     decoder = cxl_decoder_get_prev(decoder))
e0018b
+
e0018b
 struct cxl_target;
e0018b
 struct cxl_target *cxl_target_get_first(struct cxl_decoder *decoder);
e0018b
 struct cxl_target *cxl_target_get_next(struct cxl_target *target);
e0018b
diff --git a/cxl/memdev.c b/cxl/memdev.c
e0018b
index 1cecad2..e42f554 100644
e0018b
--- a/cxl/memdev.c
e0018b
+++ b/cxl/memdev.c
e0018b
@@ -33,6 +33,7 @@ static struct parameters {
e0018b
 	bool align;
e0018b
 	const char *type;
e0018b
 	const char *size;
e0018b
+	const char *decoder_filter;
e0018b
 } param;
e0018b
 
e0018b
 static struct log_ctx ml;
e0018b
@@ -71,6 +72,19 @@ OPT_STRING('s', "size",  &param.size, "size",			\
e0018b
 OPT_BOOLEAN('a', "align",  &param.align,			\
e0018b
 	"auto-align --size per device's requirement")
e0018b
 
e0018b
+#define RESERVE_DPA_OPTIONS()                                          \
e0018b
+OPT_STRING('s', "size", &param.size, "size",                           \
e0018b
+	   "size in bytes (Default: all available capacity)")
e0018b
+
e0018b
+#define DPA_OPTIONS()                                          \
e0018b
+OPT_STRING('d', "decoder", &param.decoder_filter,              \
e0018b
+   "decoder instance id",                                      \
e0018b
+   "override the automatic decoder selection"),                \
e0018b
+OPT_STRING('t', "type", &param.type, "type",                   \
e0018b
+	   "'pmem' or 'ram' (volatile) (Default: 'pmem')"),    \
e0018b
+OPT_BOOLEAN('f', "force", &param.force,                        \
e0018b
+	    "Attempt 'expected to fail' operations")
e0018b
+
e0018b
 static const struct option read_options[] = {
e0018b
 	BASE_OPTIONS(),
e0018b
 	LABEL_OPTIONS(),
e0018b
@@ -108,6 +122,242 @@ static const struct option set_partition_options[] = {
e0018b
 	OPT_END(),
e0018b
 };
e0018b
 
e0018b
+static const struct option reserve_dpa_options[] = {
e0018b
+	BASE_OPTIONS(),
e0018b
+	RESERVE_DPA_OPTIONS(),
e0018b
+	DPA_OPTIONS(),
e0018b
+	OPT_END(),
e0018b
+};
e0018b
+
e0018b
+static const struct option free_dpa_options[] = {
e0018b
+	BASE_OPTIONS(),
e0018b
+	DPA_OPTIONS(),
e0018b
+	OPT_END(),
e0018b
+};
e0018b
+
e0018b
+enum reserve_dpa_mode {
e0018b
+	DPA_ALLOC,
e0018b
+	DPA_FREE,
e0018b
+};
e0018b
+
e0018b
+static int __reserve_dpa(struct cxl_memdev *memdev,
e0018b
+			 enum reserve_dpa_mode alloc_mode,
e0018b
+			 struct action_context *actx)
e0018b
+{
e0018b
+	struct cxl_decoder *decoder, *auto_target = NULL, *target = NULL;
e0018b
+	struct cxl_endpoint *endpoint = cxl_memdev_get_endpoint(memdev);
e0018b
+	const char *devname = cxl_memdev_get_devname(memdev);
e0018b
+	unsigned long long avail_dpa, size;
e0018b
+	enum cxl_decoder_mode mode;
e0018b
+	struct cxl_port *port;
e0018b
+	char buf[256];
e0018b
+	int rc;
e0018b
+
e0018b
+	if (param.type) {
e0018b
+		if (strcmp(param.type, "ram") == 0)
e0018b
+			mode = CXL_DECODER_MODE_RAM;
e0018b
+		else if (strcmp(param.type, "volatile") == 0)
e0018b
+			mode = CXL_DECODER_MODE_RAM;
e0018b
+		else if (strcmp(param.type, "ram") == 0)
e0018b
+			mode = CXL_DECODER_MODE_RAM;
e0018b
+		else if (strcmp(param.type, "pmem") == 0)
e0018b
+			mode = CXL_DECODER_MODE_PMEM;
e0018b
+		else {
e0018b
+			log_err(&ml, "%s: unsupported type: %s\n", devname,
e0018b
+				param.type);
e0018b
+			return -EINVAL;
e0018b
+		}
e0018b
+	} else
e0018b
+		mode = CXL_DECODER_MODE_RAM;
e0018b
+
e0018b
+	if (!endpoint) {
e0018b
+		log_err(&ml, "%s: CXL operation disabled\n", devname);
e0018b
+		return -ENXIO;
e0018b
+	}
e0018b
+
e0018b
+	port = cxl_endpoint_get_port(endpoint);
e0018b
+
e0018b
+	if (mode == CXL_DECODER_MODE_RAM)
e0018b
+		avail_dpa = cxl_memdev_get_ram_size(memdev);
e0018b
+	else
e0018b
+		avail_dpa = cxl_memdev_get_pmem_size(memdev);
e0018b
+
e0018b
+	cxl_decoder_foreach(port, decoder) {
e0018b
+		size = cxl_decoder_get_dpa_size(decoder);
e0018b
+		if (size == ULLONG_MAX)
e0018b
+			continue;
e0018b
+		if (cxl_decoder_get_mode(decoder) != mode)
e0018b
+			continue;
e0018b
+
e0018b
+		if (size > avail_dpa) {
e0018b
+			log_err(&ml, "%s: capacity accounting error\n",
e0018b
+				devname);
e0018b
+			return -ENXIO;
e0018b
+		}
e0018b
+		avail_dpa -= size;
e0018b
+	}
e0018b
+
e0018b
+	if (!param.size)
e0018b
+		if (alloc_mode == DPA_ALLOC) {
e0018b
+			size = avail_dpa;
e0018b
+			if (!avail_dpa) {
e0018b
+				log_err(&ml, "%s: no available capacity\n",
e0018b
+					devname);
e0018b
+				return -ENOSPC;
e0018b
+			}
e0018b
+		} else
e0018b
+			size = 0;
e0018b
+	else {
e0018b
+		size = parse_size64(param.size);
e0018b
+		if (size == ULLONG_MAX) {
e0018b
+			log_err(&ml, "%s: failed to parse size option '%s'\n",
e0018b
+				devname, param.size);
e0018b
+			return -EINVAL;
e0018b
+		}
e0018b
+		if (size > avail_dpa) {
e0018b
+			log_err(&ml, "%s: '%s' exceeds available capacity\n",
e0018b
+				devname, param.size);
e0018b
+			if (!param.force)
e0018b
+				return -ENOSPC;
e0018b
+		}
e0018b
+	}
e0018b
+
e0018b
+	/*
e0018b
+	 * Find next free decoder, assumes cxl_decoder_foreach() is in
e0018b
+	 * hardware instance-id order
e0018b
+	 */
e0018b
+	if (alloc_mode == DPA_ALLOC)
e0018b
+		cxl_decoder_foreach(port, decoder) {
e0018b
+			/* first 0-dpa_size is our target */
e0018b
+			if (cxl_decoder_get_dpa_size(decoder) == 0) {
e0018b
+				auto_target = decoder;
e0018b
+				break;
e0018b
+			}
e0018b
+		}
e0018b
+	else
e0018b
+		cxl_decoder_foreach_reverse(port, decoder) {
e0018b
+			/* nothing to free? */
e0018b
+			if (!cxl_decoder_get_dpa_size(decoder))
e0018b
+				continue;
e0018b
+			/*
e0018b
+			 * Active decoders can't be freed, and by definition all
e0018b
+			 * previous decoders must also be active
e0018b
+			 */
e0018b
+			if (cxl_decoder_get_size(decoder))
e0018b
+				break;
e0018b
+			/* first dpa_size > 0 + disabled decoder is our target */
e0018b
+			if (cxl_decoder_get_dpa_size(decoder) < ULLONG_MAX) {
e0018b
+				auto_target = decoder;
e0018b
+				break;
e0018b
+			}
e0018b
+		}
e0018b
+
e0018b
+	if (param.decoder_filter) {
e0018b
+		unsigned long id;
e0018b
+		char *end;
e0018b
+
e0018b
+		id = strtoul(param.decoder_filter, &end, 0);
e0018b
+		/* allow for standalone ordinal decoder ids */
e0018b
+		if (*end == '\0')
e0018b
+			rc = snprintf(buf, sizeof(buf), "decoder%d.%ld",
e0018b
+				      cxl_port_get_id(port), id);
e0018b
+		else
e0018b
+			rc = snprintf(buf, sizeof(buf), "%s",
e0018b
+				      param.decoder_filter);
e0018b
+
e0018b
+		if (rc >= (int)sizeof(buf)) {
e0018b
+			log_err(&ml, "%s: decoder filter '%s' too long\n",
e0018b
+				devname, param.decoder_filter);
e0018b
+			return -EINVAL;
e0018b
+		}
e0018b
+
e0018b
+		if (alloc_mode == DPA_ALLOC)
e0018b
+			cxl_decoder_foreach(port, decoder) {
e0018b
+				target = util_cxl_decoder_filter(decoder, buf);
e0018b
+				if (target)
e0018b
+					break;
e0018b
+			}
e0018b
+		else
e0018b
+			cxl_decoder_foreach_reverse(port, decoder) {
e0018b
+				target = util_cxl_decoder_filter(decoder, buf);
e0018b
+				if (target)
e0018b
+					break;
e0018b
+			}
e0018b
+
e0018b
+		if (!target) {
e0018b
+			log_err(&ml, "%s: no match for decoder: '%s'\n",
e0018b
+				devname, param.decoder_filter);
e0018b
+			return -ENXIO;
e0018b
+		}
e0018b
+
e0018b
+		if (target != auto_target) {
e0018b
+			log_err(&ml, "%s: %s is out of sequence\n", devname,
e0018b
+				cxl_decoder_get_devname(target));
e0018b
+			if (!param.force)
e0018b
+				return -EINVAL;
e0018b
+		}
e0018b
+	}
e0018b
+
e0018b
+	if (!target)
e0018b
+		target = auto_target;
e0018b
+
e0018b
+	if (!target) {
e0018b
+		log_err(&ml, "%s: no suitable decoder found\n", devname);
e0018b
+		return -ENXIO;
e0018b
+	}
e0018b
+
e0018b
+	if (cxl_decoder_get_mode(target) != mode) {
e0018b
+		rc = cxl_decoder_set_dpa_size(target, 0);
e0018b
+		if (rc) {
e0018b
+			log_err(&ml,
e0018b
+				"%s: %s: failed to clear allocation to set mode\n",
e0018b
+				devname, cxl_decoder_get_devname(target));
e0018b
+			return rc;
e0018b
+		}
e0018b
+		rc = cxl_decoder_set_mode(target, mode);
e0018b
+		if (rc) {
e0018b
+			log_err(&ml, "%s: %s: failed to set %s mode\n", devname,
e0018b
+				cxl_decoder_get_devname(target),
e0018b
+				mode == CXL_DECODER_MODE_PMEM ? "pmem" : "ram");
e0018b
+			return rc;
e0018b
+		}
e0018b
+	}
e0018b
+
e0018b
+	rc = cxl_decoder_set_dpa_size(target, size);
e0018b
+	if (rc)
e0018b
+		log_err(&ml, "%s: %s: failed to set dpa allocation\n", devname,
e0018b
+			cxl_decoder_get_devname(target));
e0018b
+	else {
e0018b
+		struct json_object *jdev, *jdecoder;
e0018b
+		unsigned long flags = 0;
e0018b
+
e0018b
+		if (actx->f_out == stdout && isatty(1))
e0018b
+			flags |= UTIL_JSON_HUMAN;
e0018b
+		jdev = util_cxl_memdev_to_json(memdev, flags);
e0018b
+		jdecoder = util_cxl_decoder_to_json(target, flags);
e0018b
+		if (!jdev || !jdecoder) {
e0018b
+			json_object_put(jdev);
e0018b
+			json_object_put(jdecoder);
e0018b
+		} else {
e0018b
+			json_object_object_add(jdev, "decoder", jdecoder);
e0018b
+			json_object_array_add(actx->jdevs, jdev);
e0018b
+		}
e0018b
+	}
e0018b
+	return rc;
e0018b
+}
e0018b
+
e0018b
+static int action_reserve_dpa(struct cxl_memdev *memdev,
e0018b
+			      struct action_context *actx)
e0018b
+{
e0018b
+	return __reserve_dpa(memdev, DPA_ALLOC, actx);
e0018b
+}
e0018b
+
e0018b
+static int action_free_dpa(struct cxl_memdev *memdev,
e0018b
+			   struct action_context *actx)
e0018b
+{
e0018b
+	return __reserve_dpa(memdev, DPA_FREE, actx);
e0018b
+}
e0018b
+
e0018b
 static int action_disable(struct cxl_memdev *memdev, struct action_context *actx)
e0018b
 {
e0018b
 	if (!cxl_memdev_is_enabled(memdev))
e0018b
@@ -452,7 +702,8 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
e0018b
 		err++;
e0018b
 	}
e0018b
 
e0018b
-	if (action == action_setpartition)
e0018b
+	if (action == action_setpartition || action == action_reserve_dpa ||
e0018b
+	    action == action_free_dpa)
e0018b
 		actx.jdevs = json_object_new_array();
e0018b
 
e0018b
 	if (err == argc) {
e0018b
@@ -495,6 +746,8 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
e0018b
 	count = 0;
e0018b
 
e0018b
 	for (i = 0; i < argc; i++) {
e0018b
+		bool found = false;
e0018b
+
e0018b
 		cxl_memdev_foreach(ctx, memdev) {
e0018b
 			const char *memdev_filter = NULL;
e0018b
 			const char *serial_filter = NULL;
e0018b
@@ -507,6 +760,7 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
e0018b
 			if (!util_cxl_memdev_filter(memdev, memdev_filter,
e0018b
 						    serial_filter))
e0018b
 				continue;
e0018b
+			found = true;
e0018b
 
e0018b
 			if (action == action_write) {
e0018b
 				single = memdev;
e0018b
@@ -519,6 +773,8 @@ static int memdev_action(int argc, const char **argv, struct cxl_ctx *ctx,
e0018b
 			else if (rc && !err)
e0018b
 				err = rc;
e0018b
 		}
e0018b
+		if (!found)
e0018b
+			log_info(&ml, "no memdev matches %s\n", argv[i]);
e0018b
 	}
e0018b
 	rc = err;
e0018b
 
e0018b
@@ -622,3 +878,25 @@ int cmd_set_partition(int argc, const char **argv, struct cxl_ctx *ctx)
e0018b
 
e0018b
 	return count >= 0 ? 0 : EXIT_FAILURE;
e0018b
 }
e0018b
+
e0018b
+int cmd_reserve_dpa(int argc, const char **argv, struct cxl_ctx *ctx)
e0018b
+{
e0018b
+	int count = memdev_action(
e0018b
+		argc, argv, ctx, action_reserve_dpa, reserve_dpa_options,
e0018b
+		"cxl reserve-dpa <mem0> [<mem1>..<memn>] [<options>]");
e0018b
+	log_info(&ml, "reservation completed on %d mem device%s\n",
e0018b
+		 count >= 0 ? count : 0, count > 1 ? "s" : "");
e0018b
+
e0018b
+	return count >= 0 ? 0 : EXIT_FAILURE;
e0018b
+}
e0018b
+
e0018b
+int cmd_free_dpa(int argc, const char **argv, struct cxl_ctx *ctx)
e0018b
+{
e0018b
+	int count = memdev_action(
e0018b
+		argc, argv, ctx, action_free_dpa, free_dpa_options,
e0018b
+		"cxl free-dpa <mem0> [<mem1>..<memn>] [<options>]");
e0018b
+	log_info(&ml, "reservation release completed on %d mem device%s\n",
e0018b
+		 count >= 0 ? count : 0, count > 1 ? "s" : "");
e0018b
+
e0018b
+	return count >= 0 ? 0 : EXIT_FAILURE;
e0018b
+}
e0018b
-- 
e0018b
2.27.0
e0018b