Blame SOURCES/pr150.patch

66a37d
From 943a7f434b10c19f8e8e865c3cc40685b9903822 Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Thu, 24 Mar 2022 17:32:43 -0300
66a37d
Subject: [PATCH 1/6] Provide a maximum job length depending on the
66a37d
 virtualization
66a37d
66a37d
Identify if a system is running on baremetal or PowerVM and provide
66a37d
a maximum job length adapted to each case.
66a37d
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_inflate.c |  5 +++--
66a37d
 lib/nx_zlib.c    | 25 +++++++++++++++++++++++--
66a37d
 lib/nx_zlib.h    |  4 ++++
66a37d
 3 files changed, 30 insertions(+), 4 deletions(-)
66a37d
66a37d
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
66a37d
index ec32b4c..77ad33c 100644
66a37d
--- a/lib/nx_inflate.c
66a37d
+++ b/lib/nx_inflate.c
66a37d
@@ -945,8 +945,9 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	uint32_t write_sz, source_sz, target_sz;
66a37d
 	long loop_cnt = 0, loop_max = 0xffff;
66a37d
 
66a37d
-	/* inflate benefits from large jobs; memcopies must be amortized */
66a37d
-	uint32_t inflate_per_job_len = 64 * nx_config.per_job_len;
66a37d
+	/** \brief inflate benefits from large jobs; memcopies must be
66a37d
+	 *  amortized.  */
66a37d
+	const uint32_t inflate_per_job_len = nx_config.per_job_len;
66a37d
 
66a37d
 	/* nx hardware */
66a37d
 	uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
66a37d
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
66a37d
index 28ea482..a50d6f7 100644
66a37d
--- a/lib/nx_zlib.c
66a37d
+++ b/lib/nx_zlib.c
66a37d
@@ -64,6 +64,18 @@
66a37d
 #include "nx_utils.h"
66a37d
 #include "nx_zlib.h"
66a37d
 
66a37d
+/* Use the following values as maximum length of NX jobs when the OS doesn't
66a37d
+   provide the value itself, which is the default behavior until Linux 5.17  */
66a37d
+
66a37d
+/** \brief Maximum job length on baremetal
66a37d
+ *
66a37d
+ *  While the system does allow up-to 2 GiB as the maximum job length, restrict
66a37d
+ *  it to 64 MiB.
66a37d
+ */
66a37d
+#define DEFAULT_MAX_JOB_BAREMETAL 64 * 1024 * 1024
66a37d
+/** \brief Maximum job length on PowerVM  */
66a37d
+#define DEFAULT_MAX_JOB_POWERVM   1024 * 1024
66a37d
+
66a37d
 struct nx_config_t nx_config;
66a37d
 static struct nx_dev_t nx_devices[NX_DEVICES_MAX];
66a37d
 static int nx_dev_count = 0;
66a37d
@@ -639,8 +651,13 @@ static int nx_query_job_limits()
66a37d
 		}
66a37d
 	}
66a37d
 
66a37d
-	/* On error return default value of 1 MB */
66a37d
-	return (1024 * 1024);
66a37d
+	/* On error return default value.  */
66a37d
+	switch (nx_config.virtualization) {
66a37d
+		case BAREMETAL:
66a37d
+			return DEFAULT_MAX_JOB_BAREMETAL;
66a37d
+		default:
66a37d
+			return DEFAULT_MAX_JOB_POWERVM;
66a37d
+	}
66a37d
 }
66a37d
 
66a37d
 /*
66a37d
@@ -659,6 +676,9 @@ static int nx_enumerate_engines()
66a37d
 	int count = 0;
66a37d
 	size_t n;
66a37d
 
66a37d
+	/* Assume baremetal by default.  */
66a37d
+	nx_config.virtualization = BAREMETAL;
66a37d
+
66a37d
 	d = opendir(DEVICE_TREE);
66a37d
 	if (d == NULL){
66a37d
 		prt_err("open device tree dir failed.\n");
66a37d
@@ -712,6 +732,7 @@ static int nx_enumerate_engines()
66a37d
 		}
66a37d
 		/* On PowerVM, there is no concept of multiple NX engines.  */
66a37d
 		if (strncmp(de->d_name, "ibm,powervm", 11) == 0){
66a37d
+			nx_config.virtualization = POWERVM;
66a37d
 			closedir(d);
66a37d
 			return 1;
66a37d
 		}
66a37d
diff --git a/lib/nx_zlib.h b/lib/nx_zlib.h
66a37d
index e84bd7e..fa73b01 100644
66a37d
--- a/lib/nx_zlib.h
66a37d
+++ b/lib/nx_zlib.h
66a37d
@@ -129,6 +129,8 @@ void nx_print_dde(nx_dde_t *ddep, const char *msg);
66a37d
 #define zlib_version zlibVersion()
66a37d
 extern const char *zlibVersion OF((void));
66a37d
 
66a37d
+enum virtualization {BAREMETAL=0, POWERVM=1};
66a37d
+
66a37d
 /* common config variables for all streams */
66a37d
 struct nx_config_t {
66a37d
 	long     page_sz;
66a37d
@@ -158,6 +160,8 @@ struct nx_config_t {
66a37d
 				     * dynamic huffman */
66a37d
 	struct selector mode; /** mode selector: selects between software
66a37d
 				* and hardware compression. */
66a37d
+	uint8_t virtualization; /** Indicate the virtualization type being
66a37d
+				 *  used. */
66a37d
 };
66a37d
 typedef struct nx_config_t *nx_configp_t;
66a37d
 extern struct nx_config_t nx_config;
66a37d
66a37d
From b22eb7bffe61e36f70661921a689e44370d3c7e5 Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Thu, 24 Mar 2022 18:03:28 -0300
66a37d
Subject: [PATCH 2/6] inflate: Move code that initializes the DDE to their own
66a37d
 functions
66a37d
66a37d
Create functions nx_reset_dde() and nx_init_dde() based on previous code
66a37d
helping to reduce the size of nx_inflate_() and making it easier to
66a37d
understand the code.
66a37d
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_inflate.c | 197 ++++++++++++++++++++++++++++-------------------
66a37d
 1 file changed, 116 insertions(+), 81 deletions(-)
66a37d
66a37d
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
66a37d
index 77ad33c..f1d9adb 100644
66a37d
--- a/lib/nx_inflate.c
66a37d
+++ b/lib/nx_inflate.c
66a37d
@@ -934,6 +934,120 @@ static int copy_data_to_fifo_in(nx_streamp s) {
66a37d
 	return Z_OK;
66a37d
 }
66a37d
 
66a37d
+/** \brief Reset DDE to initial values.
66a37d
+ *
66a37d
+ *  @param s nx_streamp to be processed.
66a37d
+ *  @return Function code as passed to CRB. The function will set the CRB and
66a37d
+ *          return the value here.
66a37d
+ */
66a37d
+static int nx_reset_dde(nx_streamp s) {
66a37d
+	nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
66a37d
+	uint32_t fc;
66a37d
+
66a37d
+	/* address/len lists */
66a37d
+	clearp_dde(s->ddl_in);
66a37d
+	clearp_dde(s->ddl_out);
66a37d
+
66a37d
+	/* FC, CRC, HistLen, Table 6-6 */
66a37d
+	if (s->resuming || (s->dict_len > 0)) {
66a37d
+		/* Resuming a partially decompressed input.  */
66a37d
+		fc = GZIP_FC_DECOMPRESS_RESUME;
66a37d
+	} else {
66a37d
+		/* First decompress job */
66a37d
+		fc = GZIP_FC_DECOMPRESS;
66a37d
+
66a37d
+		/* We use the most recently measured compression ratio
66a37d
+		   as a heuristic to estimate the input and output
66a37d
+		   sizes. If we give too much input, the target buffer
66a37d
+		   overflows and NX cycles are wasted, and then we
66a37d
+		   must retry with smaller input size. 1000 is 100% */
66a37d
+		s->last_comp_ratio = 1000UL;
66a37d
+	}
66a37d
+
66a37d
+	/* clear then copy fc to the crb */
66a37d
+	cmdp->crb.gzip_fc = 0;
66a37d
+	putnn(cmdp->crb, gzip_fc, fc);
66a37d
+
66a37d
+	return fc;
66a37d
+}
66a37d
+
66a37d
+/** \brief Initialize DDE, appending a dictionary, if necessary.
66a37d
+ *
66a37d
+ *  @param s nx_streamp to be processed.
66a37d
+ *  @return The history length
66a37d
+ */
66a37d
+static int nx_init_dde(nx_streamp s) {
66a37d
+	nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
66a37d
+	int nx_history_len = s->history_len;
66a37d
+
66a37d
+	/* FC, CRC, HistLen, Table 6-6 */
66a37d
+	if (s->resuming || (s->dict_len > 0)) {
66a37d
+		/* Resuming a partially decompressed input.  The key
66a37d
+		   to resume is supplying the max 32KB dictionary
66a37d
+		   (history) to NX, which is basically the last 32KB
66a37d
+		   or less of the output earlier produced. And also
66a37d
+		   make sure partial checksums are carried forward
66a37d
+		*/
66a37d
+
66a37d
+		/* Crc of prev job passed to the job to be resumed */
66a37d
+		put32(cmdp->cpb, in_crc, s->crc32);
66a37d
+		put32(cmdp->cpb, in_adler, s->adler32);
66a37d
+
66a37d
+		/* Round up the sizes to quadword. Section 2.10
66a37d
+		   Rounding up will not segfault because
66a37d
+		   nx_alloc_buffer has padding at the beginning */
66a37d
+
66a37d
+		if (s->dict_len > 0) {
66a37d
+			/* lays dict on top of hist */
66a37d
+			nx_history_len = nx_amend_history_with_dict(s);
66a37d
+
66a37d
+			if (s->wrap == HEADER_ZLIB) {
66a37d
+				/* in the raw mode pass crc as is; in the zlib
66a37d
+				   mode initialize them */
66a37d
+				put32(cmdp->cpb, in_crc, INIT_CRC );
66a37d
+				put32(cmdp->cpb, in_adler, INIT_ADLER);
66a37d
+				put32(cmdp->cpb, out_crc, INIT_CRC );
66a37d
+				put32(cmdp->cpb, out_adler, INIT_ADLER);
66a37d
+			}
66a37d
+			print_dbg_info(s, __LINE__);
66a37d
+		} else {
66a37d
+			/* no dictionary here */
66a37d
+			ASSERT( s->dict_len == 0 );
66a37d
+			nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
66a37d
+			putnn(cmdp->cpb, in_histlen, nx_history_len);
66a37d
+			/* convert to bytes */
66a37d
+			nx_history_len = nx_history_len * NXQWSZ;
66a37d
+
66a37d
+			if (nx_history_len > 0) {
66a37d
+				/* deflate history goes in first */
66a37d
+				ASSERT(s->cur_out >= nx_history_len);
66a37d
+				nx_append_dde(s->ddl_in,
66a37d
+					      s->fifo_out + (s->cur_out
66a37d
+							     - nx_history_len),
66a37d
+					      nx_history_len);
66a37d
+			}
66a37d
+			print_dbg_info(s, __LINE__);
66a37d
+		}
66a37d
+	} else {
66a37d
+		nx_history_len = s->history_len = 0;
66a37d
+		/* writing a 0 clears out subc as well */
66a37d
+		cmdp->cpb.in_histlen = 0;
66a37d
+
66a37d
+		/* initialize the crc values */
66a37d
+		put32(cmdp->cpb, in_crc, INIT_CRC );
66a37d
+		put32(cmdp->cpb, in_adler, INIT_ADLER);
66a37d
+		put32(cmdp->cpb, out_crc, INIT_CRC );
66a37d
+		put32(cmdp->cpb, out_adler, INIT_ADLER);
66a37d
+	}
66a37d
+
66a37d
+	/* We use the most recently measured compression ratio as a heuristic
66a37d
+	   to estimate the input and output sizes. If we give too much input,
66a37d
+	   the target buffer overflows and NX cycles are wasted, and then we
66a37d
+	   must retry with smaller input size. 1000 is 100% */
66a37d
+	s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
66a37d
+	return nx_history_len;
66a37d
+}
66a37d
+
66a37d
 /** \brief Internal implementation of inflate.
66a37d
  *
66a37d
  * @param s nx_streamp to be processed.
66a37d
@@ -1075,87 +1189,8 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 
66a37d
 	/* NX decompresses input data */
66a37d
 
66a37d
-	/* address/len lists */
66a37d
-	clearp_dde(ddl_in);
66a37d
-	clearp_dde(ddl_out);
66a37d
-
66a37d
-	nx_history_len = s->history_len;
66a37d
-
66a37d
-	/* FC, CRC, HistLen, Table 6-6 */
66a37d
-	if (s->resuming || (s->dict_len > 0)) {
66a37d
-		/* Resuming a partially decompressed input.  The key
66a37d
-		   to resume is supplying the max 32KB dictionary
66a37d
-		   (history) to NX, which is basically the last 32KB
66a37d
-		   or less of the output earlier produced. And also
66a37d
-		   make sure partial checksums are carried forward
66a37d
-		*/
66a37d
-		fc = GZIP_FC_DECOMPRESS_RESUME;
66a37d
-
66a37d
-		/* Crc of prev job passed to the job to be resumed */
66a37d
-		put32(cmdp->cpb, in_crc, s->crc32);
66a37d
-		put32(cmdp->cpb, in_adler, s->adler32);
66a37d
-
66a37d
-		/* Round up the sizes to quadword. Section 2.10
66a37d
-		   Rounding up will not segfault because
66a37d
-		   nx_alloc_buffer has padding at the beginning */
66a37d
-
66a37d
-		if (s->dict_len > 0) {
66a37d
-			/* lays dict on top of hist */
66a37d
-			nx_history_len = nx_amend_history_with_dict(s);
66a37d
-
66a37d
-			if (s->wrap == HEADER_ZLIB) {
66a37d
-				/* in the raw mode pass crc as is; in the zlib mode
66a37d
-				   initialize them */
66a37d
-				put32(cmdp->cpb, in_crc, INIT_CRC );
66a37d
-				put32(cmdp->cpb, in_adler, INIT_ADLER);
66a37d
-				put32(cmdp->cpb, out_crc, INIT_CRC );
66a37d
-				put32(cmdp->cpb, out_adler, INIT_ADLER);
66a37d
-			}
66a37d
-
66a37d
-			s->last_comp_ratio = NX_MAX( NX_MIN(1000UL, s->last_comp_ratio), 100L );
66a37d
-
66a37d
-			print_dbg_info(s, __LINE__);
66a37d
-		}
66a37d
-		else {
66a37d
-			/* no dictionary here */
66a37d
-			ASSERT( s->dict_len == 0 );
66a37d
-			nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
66a37d
-			putnn(cmdp->cpb, in_histlen, nx_history_len);
66a37d
-			nx_history_len = nx_history_len * NXQWSZ; /* convert to bytes */
66a37d
-
66a37d
-			if (nx_history_len > 0) {
66a37d
-				/* deflate history goes in first */
66a37d
-				ASSERT(s->cur_out >= nx_history_len);
66a37d
-				nx_append_dde(ddl_in, s->fifo_out + (s->cur_out - nx_history_len), nx_history_len);
66a37d
-			}
66a37d
-			print_dbg_info(s, __LINE__);
66a37d
-		}
66a37d
-	}
66a37d
-	else {
66a37d
-		/* First decompress job */
66a37d
-		fc = GZIP_FC_DECOMPRESS;
66a37d
-
66a37d
-		nx_history_len = s->history_len = 0;
66a37d
-		/* writing a 0 clears out subc as well */
66a37d
-		cmdp->cpb.in_histlen = 0;
66a37d
-
66a37d
-		/* initialize the crc values */
66a37d
-		put32(cmdp->cpb, in_crc, INIT_CRC );
66a37d
-		put32(cmdp->cpb, in_adler, INIT_ADLER);
66a37d
-		put32(cmdp->cpb, out_crc, INIT_CRC );
66a37d
-		put32(cmdp->cpb, out_adler, INIT_ADLER);
66a37d
-
66a37d
-		/* We use the most recently measured compression ratio
66a37d
-		   as a heuristic to estimate the input and output
66a37d
-		   sizes. If we give too much input, the target buffer
66a37d
-		   overflows and NX cycles are wasted, and then we
66a37d
-		   must retry with smaller input size. 1000 is 100% */
66a37d
-		s->last_comp_ratio = 1000UL;
66a37d
-	}
66a37d
-
66a37d
-	/* clear then copy fc to the crb */
66a37d
-	cmdp->crb.gzip_fc = 0;
66a37d
-	putnn(cmdp->crb, gzip_fc, fc);
66a37d
+	fc = nx_reset_dde(s);
66a37d
+	nx_history_len = nx_init_dde(s);
66a37d
 
66a37d
 	/*
66a37d
 	 * NX source buffers
66a37d
66a37d
From e376d92fa704108f1258e3a41fc1ffcf551d1c5b Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Fri, 25 Mar 2022 09:57:32 -0300
66a37d
Subject: [PATCH 3/6] Fix the total amount of pages being touched
66a37d
66a37d
Fix and error in nx_touch_pages_dde() that was causing the function to
66a37d
touch a different number of pages than requested.
66a37d
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_zlib.c | 2 +-
66a37d
 1 file changed, 1 insertion(+), 1 deletion(-)
66a37d
66a37d
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
66a37d
index a50d6f7..bf2a6fc 100644
66a37d
--- a/lib/nx_zlib.c
66a37d
+++ b/lib/nx_zlib.c
66a37d
@@ -398,7 +398,7 @@ int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr)
66a37d
 
66a37d
 		/* touching fewer pages than encoded in the ddebc */
66a37d
 		if ( total > buf_sz) {
66a37d
-			buf_len = NX_MIN(buf_len, total - buf_sz);
66a37d
+			buf_len = buf_sz - (total - buf_len);
66a37d
 			nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
66a37d
 			prt_trace("touch loop break len 0x%x ddead %p\n", buf_len, (void *)buf_addr);
66a37d
 			break;
66a37d
66a37d
From 1f3dc128a476c9bbbb1b503d2fc8f54365101ebf Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Thu, 24 Mar 2022 18:18:34 -0300
66a37d
Subject: [PATCH 4/6] inflate: Move code that sets the DDE
66a37d
66a37d
Create functions nx_set_dde_in() and nx_set_dde_out() based on old code.
66a37d
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_inflate.c | 64 +++++++++++++++++++++++++++++++++---------------
66a37d
 1 file changed, 44 insertions(+), 20 deletions(-)
66a37d
66a37d
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
66a37d
index f1d9adb..a9671b2 100644
66a37d
--- a/lib/nx_inflate.c
66a37d
+++ b/lib/nx_inflate.c
66a37d
@@ -1048,6 +1048,41 @@ static int nx_init_dde(nx_streamp s) {
66a37d
 	return nx_history_len;
66a37d
 }
66a37d
 
66a37d
+/** \brief Append input data to DDE
66a37d
+ *
66a37d
+ *  @param s nx_streamp to be processed.
66a37d
+ *
66a37d
+ *  @return The total amount of bytes appended to DDE
66a37d
+ */
66a37d
+static uint32_t nx_set_dde_in(nx_streamp s) {
66a37d
+	/* Buffered user input is next */
66a37d
+	if (s->fifo_in != NULL)
66a37d
+		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
66a37d
+	/* Then current user input.  */
66a37d
+	nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
66a37d
+	/* Total bytes going in to engine.  */
66a37d
+	return getp32(s->ddl_in, ddebc);
66a37d
+}
66a37d
+
66a37d
+/** \brief Append output data to DDE
66a37d
+ *
66a37d
+ *  @param s nx_streamp to be processed.
66a37d
+ *
66a37d
+ *  @return The total amount of bytes appended to DDE
66a37d
+ */
66a37d
+static uint32_t nx_set_dde_out(nx_streamp s) {
66a37d
+	/* Decompress to user buffer first.  */
66a37d
+	nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
66a37d
+
66a37d
+	/* Overflow to fifo_out.
66a37d
+	   used_out == 0 required by definition.  */
66a37d
+	ASSERT(s->used_out == 0);
66a37d
+	nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
66a37d
+		      s->len_out - s->cur_out);
66a37d
+
66a37d
+	return s->avail_out + s->len_out - s->cur_out;
66a37d
+}
66a37d
+
66a37d
 /** \brief Internal implementation of inflate.
66a37d
  *
66a37d
  * @param s nx_streamp to be processed.
66a37d
@@ -1195,28 +1230,13 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	/*
66a37d
 	 * NX source buffers
66a37d
 	 */
66a37d
-	/* buffered user input is next */
66a37d
-	if (s->fifo_in != NULL)
66a37d
-		nx_append_dde(ddl_in, s->fifo_in + s->cur_in, s->used_in);
66a37d
-	/* then current user input */
66a37d
-	nx_append_dde(ddl_in, s->next_in, s->avail_in);
66a37d
-	source_sz = getp32(ddl_in, ddebc); /* total bytes going in to engine */
66a37d
-	ASSERT( source_sz > nx_history_len );
66a37d
+	source_sz = nx_set_dde_in(s);
66a37d
+	ASSERT(source_sz > nx_history_len);
66a37d
 
66a37d
 	/*
66a37d
 	 * NX target buffers
66a37d
 	 */
66a37d
-	ASSERT(s->used_out == 0);
66a37d
-
66a37d
-	uint32_t len_next_out = s->avail_out;
66a37d
-	nx_append_dde(ddl_out, s->next_out, len_next_out); /* decomp in to user buffer */
66a37d
-
66a37d
-	/* overflow, used_out == 0 required by definition, +used_out below is unnecessary */
66a37d
-	nx_append_dde(ddl_out, s->fifo_out + s->cur_out + s->used_out, s->len_out - s->cur_out - s->used_out);
66a37d
-	target_sz = len_next_out + s->len_out - s->cur_out - s->used_out;
66a37d
-
66a37d
-	prt_info("len_next_out %d len_out %d cur_out %d used_out %d source_sz %d history_len %d\n",
66a37d
-		 len_next_out, s->len_out, s->cur_out, s->used_out, source_sz, nx_history_len);
66a37d
+	target_sz = nx_set_dde_out(s);
66a37d
 
66a37d
 	/* We want exactly the History size amount of 32KB to overflow
66a37d
 	   in to fifo_out.  If overflow is less, the history spans
66a37d
@@ -1228,6 +1248,7 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	   these copies (memcpy) for performance. Therefore, the
66a37d
 	   heuristic here will estimate the source size for the
66a37d
 	   desired target size */
66a37d
+	uint32_t len_next_out = s->avail_out;
66a37d
 
66a37d
 	/* avail_out plus 32 KB history plus a bit of overhead */
66a37d
 	uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
66a37d
@@ -1240,11 +1261,14 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 
66a37d
 	prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
66a37d
 
66a37d
+	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
66a37d
+		 " used_out %d source_sz %d history_len %d\n",
66a37d
+		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
66a37d
+		 s->used_out, source_sz, nx_history_len);
66a37d
+
66a37d
 	/* do not include input side history in the estimation */
66a37d
 	source_sz = source_sz - nx_history_len;
66a37d
-
66a37d
 	ASSERT(source_sz > 0);
66a37d
-
66a37d
 	source_sz = NX_MIN(source_sz, source_sz_expected);
66a37d
 
66a37d
 	/* add the history back */
66a37d
66a37d
From eb6cb7b01fe1fa337979353e905e3ad96514b233 Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Thu, 24 Mar 2022 18:37:27 -0300
66a37d
Subject: [PATCH 5/6] inflate: cosmetic improvements
66a37d
66a37d
- Add source code comments.
66a37d
- Improve indentation.
66a37d
- Break long lines.
66a37d
- Fix error and information messages.
66a37d
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_inflate.c | 51 ++++++++++++++++++++++++++++++++++++------------
66a37d
 1 file changed, 38 insertions(+), 13 deletions(-)
66a37d
66a37d
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
66a37d
index a9671b2..a6070bd 100644
66a37d
--- a/lib/nx_inflate.c
66a37d
+++ b/lib/nx_inflate.c
66a37d
@@ -1090,14 +1090,31 @@ static uint32_t nx_set_dde_out(nx_streamp s) {
66a37d
  */
66a37d
 static int nx_inflate_(nx_streamp s, int flush)
66a37d
 {
66a37d
-	/* queuing, file ops, byte counting */
66a37d
-	uint32_t write_sz, source_sz, target_sz;
66a37d
+	/** \brief Sum of the bytes that may be used by NX as input
66a37d
+	 *
66a37d
+	 *  Total amount of bytes sent to the NX to be used as input,
66a37d
+	 *  i.e. sum of the bytes in next_in and fifo_in.  */
66a37d
+	uint32_t source_sz;
66a37d
+
66a37d
+	/** \brief Sum of the bytes that may be used by NX as output
66a37d
+	 *
66a37d
+	 *  Maximum amount of bytes available by the NX to be used as output,
66a37d
+	 *  i.e. sum of the bytes available in next_out and fifo_out.  */
66a37d
+	uint32_t target_sz;
66a37d
+
66a37d
+	uint32_t write_sz;
66a37d
 	long loop_cnt = 0, loop_max = 0xffff;
66a37d
 
66a37d
 	/** \brief inflate benefits from large jobs; memcopies must be
66a37d
 	 *  amortized.  */
66a37d
 	const uint32_t inflate_per_job_len = nx_config.per_job_len;
66a37d
 
66a37d
+	/** \brief Estimated value for target_sz. Used to calculate
66a37d
+	 *  source_sz_expected.  */
66a37d
+	uint32_t target_sz_expected;
66a37d
+	/** \brief Estimated value for source_sz.  */
66a37d
+	uint32_t source_sz_expected;
66a37d
+
66a37d
 	/* nx hardware */
66a37d
 	uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
66a37d
 
66a37d
@@ -1251,16 +1268,20 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	uint32_t len_next_out = s->avail_out;
66a37d
 
66a37d
 	/* avail_out plus 32 KB history plus a bit of overhead */
66a37d
-	uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
66a37d
+	target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
66a37d
 
66a37d
 	target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
66a37d
 
66a37d
 	/* e.g. if we want 100KB at the output and if the compression
66a37d
 	   ratio is 10% we want 10KB if input */
66a37d
-	uint32_t source_sz_expected = (uint32_t)(((uint64_t)target_sz_expected * s->last_comp_ratio + 1000L)/1000UL);
66a37d
+	source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
66a37d
+					  * s->last_comp_ratio + 1000L)/1000UL);
66a37d
 
66a37d
-	prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
66a37d
 
66a37d
+	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
66a37d
+		 " source_sz %d last_comp_ratio %d nx_history_len %d\n",
66a37d
+		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
66a37d
+		 source_sz, s->last_comp_ratio, nx_history_len);
66a37d
 	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
66a37d
 		 " used_out %d source_sz %d history_len %d\n",
66a37d
 		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
66a37d
@@ -1280,8 +1301,11 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 
66a37d
 	/* fault in pages */
66a37d
 	nx_touch_pages_dde(ddl_in, source_sz, nx_config.page_sz, 0);
66a37d
-	nx_touch_pages_dde(ddl_out, target_sz, nx_config.page_sz, 1);
66a37d
-	nx_touch_pages( (void *)cmdp, sizeof(nx_gzip_crb_cpb_t), nx_config.page_sz, 0);
66a37d
+	nx_touch_pages_dde(ddl_out,
66a37d
+			   target_sz,
66a37d
+			   nx_config.page_sz, 1);
66a37d
+	nx_touch_pages((void *) cmdp, sizeof(nx_gzip_crb_cpb_t),
66a37d
+		       nx_config.page_sz, 0);
66a37d
 
66a37d
 	/*
66a37d
 	 * send job to NX
66a37d
@@ -1298,9 +1322,9 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 		   faulting address to fsaddr */
66a37d
 		print_dbg_info(s, __LINE__);
66a37d
 
66a37d
-		prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d ",
66a37d
-			 (void *)cmdp->crb.csb.fsaddr, source_sz);
66a37d
-		prt_warn("target_sz %d\n", target_sz);
66a37d
+		prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d "
66a37d
+			 "target_sz %d\n", (void *)cmdp->crb.csb.fsaddr,
66a37d
+			 source_sz, target_sz);
66a37d
 #ifdef NX_LOG_SOURCE_TARGET
66a37d
 		nx_print_dde(ddl_in, "source");
66a37d
 		nx_print_dde(ddl_out, "target");
66a37d
@@ -1339,8 +1363,8 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 			if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
66a37d
 			   /* TODO what to do when page faults are too many?
66a37d
 			    * Kernel MM would have killed the process. */
66a37d
-				prt_err("Cannot make progress; too many page");
66a37d
-				prt_err(" faults cc= %d\n", cc);
66a37d
+				prt_err("Cannot make progress; too many page"
66a37d
+					" faults cc= %d\n", cc);
66a37d
 			}
66a37d
 			else {
66a37d
 				prt_warn("ERR_NX_AT_FAULT: more retry\n");
66a37d
@@ -1397,7 +1421,8 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 		   cover the max expansion of INF_MIN_INPUT_LEN
66a37d
 		   bytes */
66a37d
 
66a37d
-		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data src %d hist %d\n", source_sz, nx_history_len);
66a37d
+		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
66a37d
+			 " src %d hist %d\n", source_sz, nx_history_len);
66a37d
 		goto restart_nx;
66a37d
 
66a37d
 	case ERR_NX_OK:
66a37d
66a37d
From 806bf8e3ed1d0ae8a21bc6b2035df390f1062c26 Mon Sep 17 00:00:00 2001
66a37d
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
Date: Mon, 28 Mar 2022 18:15:37 -0300
66a37d
Subject: [PATCH 6/6] inflate: Limit the amount of data added to the DDE
66a37d
66a37d
Stop adding all input and output data to the DDE and limit based on the
66a37d
calculated value for source_sz_expected and target_sz_expected.
66a37d
By limiting these values, we end up better estimating the amount of
66a37d
pages that need to be touched, reducing the amount of time spent
66a37d
touching pages that might not be used.
66a37d
66a37d
Reported-by: Puvichakravarthy Ramachandran <puvichakravarthy@in.ibm.com>
66a37d
Reported-by: Poorna Chandra Vemula <Poorna.Chandra.Vemula@ibm.com>
66a37d
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
66a37d
---
66a37d
 lib/nx_inflate.c | 180 +++++++++++++++++++++++++++++------------------
66a37d
 1 file changed, 111 insertions(+), 69 deletions(-)
66a37d
66a37d
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
66a37d
index a6070bd..b30cdf6 100644
66a37d
--- a/lib/nx_inflate.c
66a37d
+++ b/lib/nx_inflate.c
66a37d
@@ -955,13 +955,6 @@ static int nx_reset_dde(nx_streamp s) {
66a37d
 	} else {
66a37d
 		/* First decompress job */
66a37d
 		fc = GZIP_FC_DECOMPRESS;
66a37d
-
66a37d
-		/* We use the most recently measured compression ratio
66a37d
-		   as a heuristic to estimate the input and output
66a37d
-		   sizes. If we give too much input, the target buffer
66a37d
-		   overflows and NX cycles are wasted, and then we
66a37d
-		   must retry with smaller input size. 1000 is 100% */
66a37d
-		s->last_comp_ratio = 1000UL;
66a37d
 	}
66a37d
 
66a37d
 	/* clear then copy fc to the crb */
66a37d
@@ -1051,15 +1044,24 @@ static int nx_init_dde(nx_streamp s) {
66a37d
 /** \brief Append input data to DDE
66a37d
  *
66a37d
  *  @param s nx_streamp to be processed.
66a37d
+ *  @param source_sz_expected The total amount of bytes expected as input. It
66a37d
+ *         does not include dictionary or history.
66a37d
  *
66a37d
  *  @return The total amount of bytes appended to DDE
66a37d
  */
66a37d
-static uint32_t nx_set_dde_in(nx_streamp s) {
66a37d
+static uint32_t nx_set_dde_in(nx_streamp s, uint32_t source_sz_expected) {
66a37d
+	uint32_t tmp = 0;
66a37d
+
66a37d
 	/* Buffered user input is next */
66a37d
-	if (s->fifo_in != NULL)
66a37d
-		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
66a37d
-	/* Then current user input.  */
66a37d
-	nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
66a37d
+	if (s->fifo_in != NULL) {
66a37d
+		tmp = NX_MIN(s->used_in, source_sz_expected);
66a37d
+		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, tmp);
66a37d
+	}
66a37d
+	if (tmp < source_sz_expected) {
66a37d
+		tmp = NX_MIN(s->avail_in, source_sz_expected - tmp);
66a37d
+		/* Then current user input.  */
66a37d
+		nx_append_dde(s->ddl_in, s->next_in, tmp);
66a37d
+	}
66a37d
 	/* Total bytes going in to engine.  */
66a37d
 	return getp32(s->ddl_in, ddebc);
66a37d
 }
66a37d
@@ -1067,20 +1069,32 @@ static uint32_t nx_set_dde_in(nx_streamp s) {
66a37d
 /** \brief Append output data to DDE
66a37d
  *
66a37d
  *  @param s nx_streamp to be processed.
66a37d
+ *  @param target_sz_expected The total amount of bytes expected as output.
66a37d
  *
66a37d
  *  @return The total amount of bytes appended to DDE
66a37d
  */
66a37d
-static uint32_t nx_set_dde_out(nx_streamp s) {
66a37d
+static uint32_t nx_set_dde_out(nx_streamp s, uint32_t target_sz_expected) {
66a37d
+	uint32_t tmp;
66a37d
+	uint32_t ret;
66a37d
+
66a37d
+	ret = NX_MIN(s->avail_out, target_sz_expected);
66a37d
+
66a37d
 	/* Decompress to user buffer first.  */
66a37d
-	nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
66a37d
+	nx_append_dde(s->ddl_out, s->next_out, ret);
66a37d
+
66a37d
+	if (ret < target_sz_expected) {
66a37d
+		tmp = NX_MIN(s->len_out - s->cur_out,
66a37d
+			     target_sz_expected - ret);
66a37d
+
66a37d
+		/* Overflow to fifo_out.
66a37d
+		   used_out == 0 required by definition.  */
66a37d
+		ASSERT(s->used_out == 0);
66a37d
+		nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, tmp);
66a37d
 
66a37d
-	/* Overflow to fifo_out.
66a37d
-	   used_out == 0 required by definition.  */
66a37d
-	ASSERT(s->used_out == 0);
66a37d
-	nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
66a37d
-		      s->len_out - s->cur_out);
66a37d
+		ret += tmp;
66a37d
+	}
66a37d
 
66a37d
-	return s->avail_out + s->len_out - s->cur_out;
66a37d
+	return ret;
66a37d
 }
66a37d
 
66a37d
 /** \brief Internal implementation of inflate.
66a37d
@@ -1094,7 +1108,7 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	 *
66a37d
 	 *  Total amount of bytes sent to the NX to be used as input,
66a37d
 	 *  i.e. sum of the bytes in next_in and fifo_in.  */
66a37d
-	uint32_t source_sz;
66a37d
+	uint32_t source_sz = 0;
66a37d
 
66a37d
 	/** \brief Sum of the bytes that may be used by NX as output
66a37d
 	 *
66a37d
@@ -1242,46 +1256,69 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 	/* NX decompresses input data */
66a37d
 
66a37d
 	fc = nx_reset_dde(s);
66a37d
-	nx_history_len = nx_init_dde(s);
66a37d
 
66a37d
-	/*
66a37d
-	 * NX source buffers
66a37d
-	 */
66a37d
-	source_sz = nx_set_dde_in(s);
66a37d
-	ASSERT(source_sz > nx_history_len);
66a37d
+	/** Estimate the amount of data sent to the NX. Ideally, we want
66a37d
+	 *  exactly the history size amount of 32 KiB to overflow in to fifo_out
66a37d
+	 *  in order to minimize copies of memory.
66a37d
+	 *  If overflow is less than 32 KiB, the history spans next_out and
66a37d
+	 *  fifo_out and must be copied in to fifo_out to setup history for the
66a37d
+	 *  next job. The fifo_out fraction is also copied back to user's
66a37d
+	 *  next_out before the next job.
66a37d
+	 *  If overflow is more, all the overflow must be copied back
66a37d
+	 *  to user's next_out before the next job.
66a37d
+	 *  If overflow is much more, we may get an ERR_NX_TARGET_SPACE, forcing
66a37d
+	 *  us to reduce the source before trying again.  A retry in this case
66a37d
+	 *  will probably require NX to process much more than 32 KiB, which
66a37d
+	 *  requires more time than copying 32 KiB of data.
66a37d
+	 *
66a37d
+	 *  With that said, we want to minimize unecessary work (i.e. memcpy
66a37d
+	 *  and retrying NX jobs) for performance. Therefore, the heuristic
66a37d
+	 *  here will estimate the source size for the desired target size, but
66a37d
+	 *  it prioritizes avoiding ERR_NX_TARGET_SPACE.  */
66a37d
 
66a37d
-	/*
66a37d
-	 * NX target buffers
66a37d
-	 */
66a37d
-	target_sz = nx_set_dde_out(s);
66a37d
-
66a37d
-	/* We want exactly the History size amount of 32KB to overflow
66a37d
-	   in to fifo_out.  If overflow is less, the history spans
66a37d
-	   next_out and fifo_out and must be copied in to fifo_out to
66a37d
-	   setup history for the next job, and the fifo_out fraction is
66a37d
-	   also copied back to user's next_out before the next job.
66a37d
-	   If overflow is more, all the overflow must be copied back
66a37d
-	   to user's next_out before the next job. We want to minimize
66a37d
-	   these copies (memcpy) for performance. Therefore, the
66a37d
-	   heuristic here will estimate the source size for the
66a37d
-	   desired target size */
66a37d
 	uint32_t len_next_out = s->avail_out;
66a37d
+	s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
66a37d
 
66a37d
-	/* avail_out plus 32 KB history plus a bit of overhead */
66a37d
+	/* avail_out plus 32 KiB history plus a bit of overhead */
66a37d
 	target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
66a37d
-
66a37d
 	target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
66a37d
 
66a37d
-	/* e.g. if we want 100KB at the output and if the compression
66a37d
-	   ratio is 10% we want 10KB if input */
66a37d
+	/** Calculate source_sz_expected based on target_sz_expected and the
66a37d
+	 *  last compression ratio, e.g. if we want 100KB at the output and if
66a37d
+	 *  the compression ratio is 10% we want 10KB if input */
66a37d
 	source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
66a37d
 					  * s->last_comp_ratio + 1000L)/1000UL);
66a37d
 
66a37d
+	/** After calculating source_sz_expected, try to provide extra
66a37d
+	 *  target_sz_expected in order to avoid an ERR_NX_TARGET_SPACE.  */
66a37d
+	target_sz_expected = NX_MIN(len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2),
66a37d
+				    4 * inflate_per_job_len);
66a37d
+	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
66a37d
+		 " source_sz %d last_comp_ratio %d\n",
66a37d
+		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
66a37d
+		 source_sz, s->last_comp_ratio);
66a37d
+
66a37d
+
66a37d
+init_dde:
66a37d
+	nx_history_len = nx_init_dde(s);
66a37d
+
66a37d
+	/*
66a37d
+	 * NX source buffers
66a37d
+	 */
66a37d
+	source_sz = nx_set_dde_in(s, source_sz_expected);
66a37d
+	ASSERT(source_sz > nx_history_len);
66a37d
+	ASSERT(source_sz <= source_sz_expected + nx_history_len);
66a37d
 
66a37d
 	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
66a37d
 		 " source_sz %d last_comp_ratio %d nx_history_len %d\n",
66a37d
 		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
66a37d
 		 source_sz, s->last_comp_ratio, nx_history_len);
66a37d
+
66a37d
+	/*
66a37d
+	 * NX target buffers
66a37d
+	 */
66a37d
+	target_sz = nx_set_dde_out(s, target_sz_expected);
66a37d
+
66a37d
 	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
66a37d
 		 " used_out %d source_sz %d history_len %d\n",
66a37d
 		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
66a37d
@@ -1345,19 +1382,22 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 			   that is about 2 pages minimum for source and
66a37d
 			   and 6 pages for target; if the system does not
66a37d
 			   have 8 free pages then the loop will last forever */
66a37d
-			source_sz = source_sz - nx_history_len;
66a37d
-			if (source_sz > (2 * INF_MIN_INPUT_LEN))
66a37d
-				source_sz = (source_sz + 1) / 2;
66a37d
-			else if (source_sz > INF_MIN_INPUT_LEN)
66a37d
-				source_sz = INF_MIN_INPUT_LEN;
66a37d
-
66a37d
-			/* else if caller gave fewer source bytes, keep it as is */
66a37d
-			source_sz = source_sz + nx_history_len;
66a37d
-
66a37d
-			if (target_sz > (2 * INF_MAX_EXPANSION_BYTES))
66a37d
-				target_sz = (target_sz + 1) / 2;
66a37d
-			else if (target_sz > INF_MAX_EXPANSION_BYTES)
66a37d
-				target_sz = INF_MAX_EXPANSION_BYTES;
66a37d
+			source_sz_expected = source_sz - nx_history_len;
66a37d
+			if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
66a37d
+				source_sz_expected
66a37d
+					= (source_sz_expected + 1) / 2;
66a37d
+			else if (source_sz_expected > INF_MIN_INPUT_LEN)
66a37d
+				source_sz_expected = INF_MIN_INPUT_LEN;
66a37d
+
66a37d
+			/* else if caller gave fewer source bytes, keep it as
66a37d
+			   is.  */
66a37d
+			source_sz = source_sz_expected + nx_history_len;
66a37d
+
66a37d
+			if (target_sz_expected > (2 * INF_MAX_EXPANSION_BYTES))
66a37d
+				target_sz_expected
66a37d
+					= (target_sz_expected + 1) / 2;
66a37d
+			else if (target_sz_expected > INF_MAX_EXPANSION_BYTES)
66a37d
+				target_sz_expected = INF_MAX_EXPANSION_BYTES;
66a37d
 
66a37d
 			ticks_total = nx_wait_ticks(500, ticks_total, 0);
66a37d
 			if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
66a37d
@@ -1368,7 +1408,8 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 			}
66a37d
 			else {
66a37d
 				prt_warn("ERR_NX_AT_FAULT: more retry\n");
66a37d
-				goto restart_nx;
66a37d
+				fc = nx_reset_dde(s);
66a37d
+				goto init_dde;
66a37d
 			}
66a37d
 		}
66a37d
 
66a37d
@@ -1403,18 +1444,17 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 		/* Target buffer not large enough; retry smaller input
66a37d
 		   data; give at least 1 byte. SPBC/TPBC are not valid */
66a37d
 		ASSERT( source_sz > nx_history_len );
66a37d
-		source_sz = ((source_sz - nx_history_len + 1) / 2) + nx_history_len;
66a37d
+		source_sz_expected = (source_sz - nx_history_len + 1) / 2;
66a37d
 
66a37d
-		source_sz = source_sz - nx_history_len;
66a37d
 		/* reduce large source down to minimum viable; if
66a37d
 		   source is already small don't change it */
66a37d
-		if (source_sz > (2 * INF_MIN_INPUT_LEN))
66a37d
-			source_sz = (source_sz + 1) / 2;
66a37d
-		else if (source_sz > INF_MIN_INPUT_LEN)
66a37d
-			source_sz = INF_MIN_INPUT_LEN;
66a37d
+		if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
66a37d
+			source_sz_expected = (source_sz_expected + 1) / 2;
66a37d
+		else if (source_sz_expected > INF_MIN_INPUT_LEN)
66a37d
+			source_sz_expected = INF_MIN_INPUT_LEN;
66a37d
 
66a37d
 		/* else if caller gave fewer source bytes, keep it as is */
66a37d
-		source_sz = source_sz + nx_history_len;
66a37d
+		source_sz = source_sz_expected + nx_history_len;
66a37d
 
66a37d
 		/* do not change target size because we allocated a
66a37d
 		   minimum of INF_MAX_EXPANSION_BYTES which should
66a37d
@@ -1422,8 +1462,10 @@ static int nx_inflate_(nx_streamp s, int flush)
66a37d
 		   bytes */
66a37d
 
66a37d
 		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
66a37d
-			 " src %d hist %d\n", source_sz, nx_history_len);
66a37d
-		goto restart_nx;
66a37d
+			 " source_sz_expected %d nx_history_len %d\n",
66a37d
+			 source_sz_expected, nx_history_len);
66a37d
+		fc = nx_reset_dde(s);
66a37d
+		goto init_dde;
66a37d
 
66a37d
 	case ERR_NX_OK:
66a37d