Blame SOURCES/pr150.patch

fabfcd
From 943a7f434b10c19f8e8e865c3cc40685b9903822 Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Thu, 24 Mar 2022 17:32:43 -0300
fabfcd
Subject: [PATCH 1/6] Provide a maximum job length depending on the
fabfcd
 virtualization
fabfcd
fabfcd
Identify if a system is running on baremetal or PowerVM and provide
fabfcd
a maximum job length adapted to each case.
fabfcd
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_inflate.c |  5 +++--
fabfcd
 lib/nx_zlib.c    | 25 +++++++++++++++++++++++--
fabfcd
 lib/nx_zlib.h    |  4 ++++
fabfcd
 3 files changed, 30 insertions(+), 4 deletions(-)
fabfcd
fabfcd
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
fabfcd
index ec32b4c..77ad33c 100644
fabfcd
--- a/lib/nx_inflate.c
fabfcd
+++ b/lib/nx_inflate.c
fabfcd
@@ -945,8 +945,9 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	uint32_t write_sz, source_sz, target_sz;
fabfcd
 	long loop_cnt = 0, loop_max = 0xffff;
fabfcd
 
fabfcd
-	/* inflate benefits from large jobs; memcopies must be amortized */
fabfcd
-	uint32_t inflate_per_job_len = 64 * nx_config.per_job_len;
fabfcd
+	/** \brief inflate benefits from large jobs; memcopies must be
fabfcd
+	 *  amortized.  */
fabfcd
+	const uint32_t inflate_per_job_len = nx_config.per_job_len;
fabfcd
 
fabfcd
 	/* nx hardware */
fabfcd
 	uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
fabfcd
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
fabfcd
index 28ea482..a50d6f7 100644
fabfcd
--- a/lib/nx_zlib.c
fabfcd
+++ b/lib/nx_zlib.c
fabfcd
@@ -64,6 +64,18 @@
fabfcd
 #include "nx_utils.h"
fabfcd
 #include "nx_zlib.h"
fabfcd
 
fabfcd
+/* Use the following values as maximum length of NX jobs when the OS doesn't
fabfcd
+   provide the value itself, which is the default behavior until Linux 5.17  */
fabfcd
+
fabfcd
+/** \brief Maximum job length on baremetal
fabfcd
+ *
fabfcd
+ *  While the system does allow up-to 2 GiB as the maximum job length, restrict
fabfcd
+ *  it to 64 MiB.
fabfcd
+ */
fabfcd
+#define DEFAULT_MAX_JOB_BAREMETAL 64 * 1024 * 1024
fabfcd
+/** \brief Maximum job length on PowerVM  */
fabfcd
+#define DEFAULT_MAX_JOB_POWERVM   1024 * 1024
fabfcd
+
fabfcd
 struct nx_config_t nx_config;
fabfcd
 static struct nx_dev_t nx_devices[NX_DEVICES_MAX];
fabfcd
 static int nx_dev_count = 0;
fabfcd
@@ -639,8 +651,13 @@ static int nx_query_job_limits()
fabfcd
 		}
fabfcd
 	}
fabfcd
 
fabfcd
-	/* On error return default value of 1 MB */
fabfcd
-	return (1024 * 1024);
fabfcd
+	/* On error return default value.  */
fabfcd
+	switch (nx_config.virtualization) {
fabfcd
+		case BAREMETAL:
fabfcd
+			return DEFAULT_MAX_JOB_BAREMETAL;
fabfcd
+		default:
fabfcd
+			return DEFAULT_MAX_JOB_POWERVM;
fabfcd
+	}
fabfcd
 }
fabfcd
 
fabfcd
 /*
fabfcd
@@ -659,6 +676,9 @@ static int nx_enumerate_engines()
fabfcd
 	int count = 0;
fabfcd
 	size_t n;
fabfcd
 
fabfcd
+	/* Assume baremetal by default.  */
fabfcd
+	nx_config.virtualization = BAREMETAL;
fabfcd
+
fabfcd
 	d = opendir(DEVICE_TREE);
fabfcd
 	if (d == NULL){
fabfcd
 		prt_err("open device tree dir failed.\n");
fabfcd
@@ -712,6 +732,7 @@ static int nx_enumerate_engines()
fabfcd
 		}
fabfcd
 		/* On PowerVM, there is no concept of multiple NX engines.  */
fabfcd
 		if (strncmp(de->d_name, "ibm,powervm", 11) == 0){
fabfcd
+			nx_config.virtualization = POWERVM;
fabfcd
 			closedir(d);
fabfcd
 			return 1;
fabfcd
 		}
fabfcd
diff --git a/lib/nx_zlib.h b/lib/nx_zlib.h
fabfcd
index e84bd7e..fa73b01 100644
fabfcd
--- a/lib/nx_zlib.h
fabfcd
+++ b/lib/nx_zlib.h
fabfcd
@@ -129,6 +129,8 @@ void nx_print_dde(nx_dde_t *ddep, const char *msg);
fabfcd
 #define zlib_version zlibVersion()
fabfcd
 extern const char *zlibVersion OF((void));
fabfcd
 
fabfcd
+enum virtualization {BAREMETAL=0, POWERVM=1};
fabfcd
+
fabfcd
 /* common config variables for all streams */
fabfcd
 struct nx_config_t {
fabfcd
 	long     page_sz;
fabfcd
@@ -158,6 +160,8 @@ struct nx_config_t {
fabfcd
 				     * dynamic huffman */
fabfcd
 	struct selector mode; /** mode selector: selects between software
fabfcd
 				* and hardware compression. */
fabfcd
+	uint8_t virtualization; /** Indicate the virtualization type being
fabfcd
+				 *  used. */
fabfcd
 };
fabfcd
 typedef struct nx_config_t *nx_configp_t;
fabfcd
 extern struct nx_config_t nx_config;
fabfcd
fabfcd
From b22eb7bffe61e36f70661921a689e44370d3c7e5 Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Thu, 24 Mar 2022 18:03:28 -0300
fabfcd
Subject: [PATCH 2/6] inflate: Move code that initializes the DDE to their own
fabfcd
 functions
fabfcd
fabfcd
Create functions nx_reset_dde() and nx_init_dde() based on previous code
fabfcd
helping to reduce the size of nx_inflate_() and making it easier to
fabfcd
understand the code.
fabfcd
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_inflate.c | 197 ++++++++++++++++++++++++++++-------------------
fabfcd
 1 file changed, 116 insertions(+), 81 deletions(-)
fabfcd
fabfcd
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
fabfcd
index 77ad33c..f1d9adb 100644
fabfcd
--- a/lib/nx_inflate.c
fabfcd
+++ b/lib/nx_inflate.c
fabfcd
@@ -934,6 +934,120 @@ static int copy_data_to_fifo_in(nx_streamp s) {
fabfcd
 	return Z_OK;
fabfcd
 }
fabfcd
 
fabfcd
+/** \brief Reset DDE to initial values.
fabfcd
+ *
fabfcd
+ *  @param s nx_streamp to be processed.
fabfcd
+ *  @return Function code as passed to CRB. The function will set the CRB and
fabfcd
+ *          return the value here.
fabfcd
+ */
fabfcd
+static int nx_reset_dde(nx_streamp s) {
fabfcd
+	nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
fabfcd
+	uint32_t fc;
fabfcd
+
fabfcd
+	/* address/len lists */
fabfcd
+	clearp_dde(s->ddl_in);
fabfcd
+	clearp_dde(s->ddl_out);
fabfcd
+
fabfcd
+	/* FC, CRC, HistLen, Table 6-6 */
fabfcd
+	if (s->resuming || (s->dict_len > 0)) {
fabfcd
+		/* Resuming a partially decompressed input.  */
fabfcd
+		fc = GZIP_FC_DECOMPRESS_RESUME;
fabfcd
+	} else {
fabfcd
+		/* First decompress job */
fabfcd
+		fc = GZIP_FC_DECOMPRESS;
fabfcd
+
fabfcd
+		/* We use the most recently measured compression ratio
fabfcd
+		   as a heuristic to estimate the input and output
fabfcd
+		   sizes. If we give too much input, the target buffer
fabfcd
+		   overflows and NX cycles are wasted, and then we
fabfcd
+		   must retry with smaller input size. 1000 is 100% */
fabfcd
+		s->last_comp_ratio = 1000UL;
fabfcd
+	}
fabfcd
+
fabfcd
+	/* clear then copy fc to the crb */
fabfcd
+	cmdp->crb.gzip_fc = 0;
fabfcd
+	putnn(cmdp->crb, gzip_fc, fc);
fabfcd
+
fabfcd
+	return fc;
fabfcd
+}
fabfcd
+
fabfcd
+/** \brief Initialize DDE, appending a dictionary, if necessary.
fabfcd
+ *
fabfcd
+ *  @param s nx_streamp to be processed.
fabfcd
+ *  @return The history length
fabfcd
+ */
fabfcd
+static int nx_init_dde(nx_streamp s) {
fabfcd
+	nx_gzip_crb_cpb_t *cmdp = s->nxcmdp;
fabfcd
+	int nx_history_len = s->history_len;
fabfcd
+
fabfcd
+	/* FC, CRC, HistLen, Table 6-6 */
fabfcd
+	if (s->resuming || (s->dict_len > 0)) {
fabfcd
+		/* Resuming a partially decompressed input.  The key
fabfcd
+		   to resume is supplying the max 32KB dictionary
fabfcd
+		   (history) to NX, which is basically the last 32KB
fabfcd
+		   or less of the output earlier produced. And also
fabfcd
+		   make sure partial checksums are carried forward
fabfcd
+		*/
fabfcd
+
fabfcd
+		/* Crc of prev job passed to the job to be resumed */
fabfcd
+		put32(cmdp->cpb, in_crc, s->crc32);
fabfcd
+		put32(cmdp->cpb, in_adler, s->adler32);
fabfcd
+
fabfcd
+		/* Round up the sizes to quadword. Section 2.10
fabfcd
+		   Rounding up will not segfault because
fabfcd
+		   nx_alloc_buffer has padding at the beginning */
fabfcd
+
fabfcd
+		if (s->dict_len > 0) {
fabfcd
+			/* lays dict on top of hist */
fabfcd
+			nx_history_len = nx_amend_history_with_dict(s);
fabfcd
+
fabfcd
+			if (s->wrap == HEADER_ZLIB) {
fabfcd
+				/* in the raw mode pass crc as is; in the zlib
fabfcd
+				   mode initialize them */
fabfcd
+				put32(cmdp->cpb, in_crc, INIT_CRC );
fabfcd
+				put32(cmdp->cpb, in_adler, INIT_ADLER);
fabfcd
+				put32(cmdp->cpb, out_crc, INIT_CRC );
fabfcd
+				put32(cmdp->cpb, out_adler, INIT_ADLER);
fabfcd
+			}
fabfcd
+			print_dbg_info(s, __LINE__);
fabfcd
+		} else {
fabfcd
+			/* no dictionary here */
fabfcd
+			ASSERT( s->dict_len == 0 );
fabfcd
+			nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
fabfcd
+			putnn(cmdp->cpb, in_histlen, nx_history_len);
fabfcd
+			/* convert to bytes */
fabfcd
+			nx_history_len = nx_history_len * NXQWSZ;
fabfcd
+
fabfcd
+			if (nx_history_len > 0) {
fabfcd
+				/* deflate history goes in first */
fabfcd
+				ASSERT(s->cur_out >= nx_history_len);
fabfcd
+				nx_append_dde(s->ddl_in,
fabfcd
+					      s->fifo_out + (s->cur_out
fabfcd
+							     - nx_history_len),
fabfcd
+					      nx_history_len);
fabfcd
+			}
fabfcd
+			print_dbg_info(s, __LINE__);
fabfcd
+		}
fabfcd
+	} else {
fabfcd
+		nx_history_len = s->history_len = 0;
fabfcd
+		/* writing a 0 clears out subc as well */
fabfcd
+		cmdp->cpb.in_histlen = 0;
fabfcd
+
fabfcd
+		/* initialize the crc values */
fabfcd
+		put32(cmdp->cpb, in_crc, INIT_CRC );
fabfcd
+		put32(cmdp->cpb, in_adler, INIT_ADLER);
fabfcd
+		put32(cmdp->cpb, out_crc, INIT_CRC );
fabfcd
+		put32(cmdp->cpb, out_adler, INIT_ADLER);
fabfcd
+	}
fabfcd
+
fabfcd
+	/* We use the most recently measured compression ratio as a heuristic
fabfcd
+	   to estimate the input and output sizes. If we give too much input,
fabfcd
+	   the target buffer overflows and NX cycles are wasted, and then we
fabfcd
+	   must retry with smaller input size. 1000 is 100% */
fabfcd
+	s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
fabfcd
+	return nx_history_len;
fabfcd
+}
fabfcd
+
fabfcd
 /** \brief Internal implementation of inflate.
fabfcd
  *
fabfcd
  * @param s nx_streamp to be processed.
fabfcd
@@ -1075,87 +1189,8 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 
fabfcd
 	/* NX decompresses input data */
fabfcd
 
fabfcd
-	/* address/len lists */
fabfcd
-	clearp_dde(ddl_in);
fabfcd
-	clearp_dde(ddl_out);
fabfcd
-
fabfcd
-	nx_history_len = s->history_len;
fabfcd
-
fabfcd
-	/* FC, CRC, HistLen, Table 6-6 */
fabfcd
-	if (s->resuming || (s->dict_len > 0)) {
fabfcd
-		/* Resuming a partially decompressed input.  The key
fabfcd
-		   to resume is supplying the max 32KB dictionary
fabfcd
-		   (history) to NX, which is basically the last 32KB
fabfcd
-		   or less of the output earlier produced. And also
fabfcd
-		   make sure partial checksums are carried forward
fabfcd
-		*/
fabfcd
-		fc = GZIP_FC_DECOMPRESS_RESUME;
fabfcd
-
fabfcd
-		/* Crc of prev job passed to the job to be resumed */
fabfcd
-		put32(cmdp->cpb, in_crc, s->crc32);
fabfcd
-		put32(cmdp->cpb, in_adler, s->adler32);
fabfcd
-
fabfcd
-		/* Round up the sizes to quadword. Section 2.10
fabfcd
-		   Rounding up will not segfault because
fabfcd
-		   nx_alloc_buffer has padding at the beginning */
fabfcd
-
fabfcd
-		if (s->dict_len > 0) {
fabfcd
-			/* lays dict on top of hist */
fabfcd
-			nx_history_len = nx_amend_history_with_dict(s);
fabfcd
-
fabfcd
-			if (s->wrap == HEADER_ZLIB) {
fabfcd
-				/* in the raw mode pass crc as is; in the zlib mode
fabfcd
-				   initialize them */
fabfcd
-				put32(cmdp->cpb, in_crc, INIT_CRC );
fabfcd
-				put32(cmdp->cpb, in_adler, INIT_ADLER);
fabfcd
-				put32(cmdp->cpb, out_crc, INIT_CRC );
fabfcd
-				put32(cmdp->cpb, out_adler, INIT_ADLER);
fabfcd
-			}
fabfcd
-
fabfcd
-			s->last_comp_ratio = NX_MAX( NX_MIN(1000UL, s->last_comp_ratio), 100L );
fabfcd
-
fabfcd
-			print_dbg_info(s, __LINE__);
fabfcd
-		}
fabfcd
-		else {
fabfcd
-			/* no dictionary here */
fabfcd
-			ASSERT( s->dict_len == 0 );
fabfcd
-			nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ;
fabfcd
-			putnn(cmdp->cpb, in_histlen, nx_history_len);
fabfcd
-			nx_history_len = nx_history_len * NXQWSZ; /* convert to bytes */
fabfcd
-
fabfcd
-			if (nx_history_len > 0) {
fabfcd
-				/* deflate history goes in first */
fabfcd
-				ASSERT(s->cur_out >= nx_history_len);
fabfcd
-				nx_append_dde(ddl_in, s->fifo_out + (s->cur_out - nx_history_len), nx_history_len);
fabfcd
-			}
fabfcd
-			print_dbg_info(s, __LINE__);
fabfcd
-		}
fabfcd
-	}
fabfcd
-	else {
fabfcd
-		/* First decompress job */
fabfcd
-		fc = GZIP_FC_DECOMPRESS;
fabfcd
-
fabfcd
-		nx_history_len = s->history_len = 0;
fabfcd
-		/* writing a 0 clears out subc as well */
fabfcd
-		cmdp->cpb.in_histlen = 0;
fabfcd
-
fabfcd
-		/* initialize the crc values */
fabfcd
-		put32(cmdp->cpb, in_crc, INIT_CRC );
fabfcd
-		put32(cmdp->cpb, in_adler, INIT_ADLER);
fabfcd
-		put32(cmdp->cpb, out_crc, INIT_CRC );
fabfcd
-		put32(cmdp->cpb, out_adler, INIT_ADLER);
fabfcd
-
fabfcd
-		/* We use the most recently measured compression ratio
fabfcd
-		   as a heuristic to estimate the input and output
fabfcd
-		   sizes. If we give too much input, the target buffer
fabfcd
-		   overflows and NX cycles are wasted, and then we
fabfcd
-		   must retry with smaller input size. 1000 is 100% */
fabfcd
-		s->last_comp_ratio = 1000UL;
fabfcd
-	}
fabfcd
-
fabfcd
-	/* clear then copy fc to the crb */
fabfcd
-	cmdp->crb.gzip_fc = 0;
fabfcd
-	putnn(cmdp->crb, gzip_fc, fc);
fabfcd
+	fc = nx_reset_dde(s);
fabfcd
+	nx_history_len = nx_init_dde(s);
fabfcd
 
fabfcd
 	/*
fabfcd
 	 * NX source buffers
fabfcd
fabfcd
From e376d92fa704108f1258e3a41fc1ffcf551d1c5b Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Fri, 25 Mar 2022 09:57:32 -0300
fabfcd
Subject: [PATCH 3/6] Fix the total amount of pages being touched
fabfcd
fabfcd
Fix and error in nx_touch_pages_dde() that was causing the function to
fabfcd
touch a different number of pages than requested.
fabfcd
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_zlib.c | 2 +-
fabfcd
 1 file changed, 1 insertion(+), 1 deletion(-)
fabfcd
fabfcd
diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c
fabfcd
index a50d6f7..bf2a6fc 100644
fabfcd
--- a/lib/nx_zlib.c
fabfcd
+++ b/lib/nx_zlib.c
fabfcd
@@ -398,7 +398,7 @@ int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr)
fabfcd
 
fabfcd
 		/* touching fewer pages than encoded in the ddebc */
fabfcd
 		if ( total > buf_sz) {
fabfcd
-			buf_len = NX_MIN(buf_len, total - buf_sz);
fabfcd
+			buf_len = buf_sz - (total - buf_len);
fabfcd
 			nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
fabfcd
 			prt_trace("touch loop break len 0x%x ddead %p\n", buf_len, (void *)buf_addr);
fabfcd
 			break;
fabfcd
fabfcd
From 1f3dc128a476c9bbbb1b503d2fc8f54365101ebf Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Thu, 24 Mar 2022 18:18:34 -0300
fabfcd
Subject: [PATCH 4/6] inflate: Move code that sets the DDE
fabfcd
fabfcd
Create functions nx_set_dde_in() and nx_set_dde_out() based on old code.
fabfcd
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_inflate.c | 64 +++++++++++++++++++++++++++++++++---------------
fabfcd
 1 file changed, 44 insertions(+), 20 deletions(-)
fabfcd
fabfcd
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
fabfcd
index f1d9adb..a9671b2 100644
fabfcd
--- a/lib/nx_inflate.c
fabfcd
+++ b/lib/nx_inflate.c
fabfcd
@@ -1048,6 +1048,41 @@ static int nx_init_dde(nx_streamp s) {
fabfcd
 	return nx_history_len;
fabfcd
 }
fabfcd
 
fabfcd
+/** \brief Append input data to DDE
fabfcd
+ *
fabfcd
+ *  @param s nx_streamp to be processed.
fabfcd
+ *
fabfcd
+ *  @return The total amount of bytes appended to DDE
fabfcd
+ */
fabfcd
+static uint32_t nx_set_dde_in(nx_streamp s) {
fabfcd
+	/* Buffered user input is next */
fabfcd
+	if (s->fifo_in != NULL)
fabfcd
+		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
fabfcd
+	/* Then current user input.  */
fabfcd
+	nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
fabfcd
+	/* Total bytes going in to engine.  */
fabfcd
+	return getp32(s->ddl_in, ddebc);
fabfcd
+}
fabfcd
+
fabfcd
+/** \brief Append output data to DDE
fabfcd
+ *
fabfcd
+ *  @param s nx_streamp to be processed.
fabfcd
+ *
fabfcd
+ *  @return The total amount of bytes appended to DDE
fabfcd
+ */
fabfcd
+static uint32_t nx_set_dde_out(nx_streamp s) {
fabfcd
+	/* Decompress to user buffer first.  */
fabfcd
+	nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
fabfcd
+
fabfcd
+	/* Overflow to fifo_out.
fabfcd
+	   used_out == 0 required by definition.  */
fabfcd
+	ASSERT(s->used_out == 0);
fabfcd
+	nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
fabfcd
+		      s->len_out - s->cur_out);
fabfcd
+
fabfcd
+	return s->avail_out + s->len_out - s->cur_out;
fabfcd
+}
fabfcd
+
fabfcd
 /** \brief Internal implementation of inflate.
fabfcd
  *
fabfcd
  * @param s nx_streamp to be processed.
fabfcd
@@ -1195,28 +1230,13 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	/*
fabfcd
 	 * NX source buffers
fabfcd
 	 */
fabfcd
-	/* buffered user input is next */
fabfcd
-	if (s->fifo_in != NULL)
fabfcd
-		nx_append_dde(ddl_in, s->fifo_in + s->cur_in, s->used_in);
fabfcd
-	/* then current user input */
fabfcd
-	nx_append_dde(ddl_in, s->next_in, s->avail_in);
fabfcd
-	source_sz = getp32(ddl_in, ddebc); /* total bytes going in to engine */
fabfcd
-	ASSERT( source_sz > nx_history_len );
fabfcd
+	source_sz = nx_set_dde_in(s);
fabfcd
+	ASSERT(source_sz > nx_history_len);
fabfcd
 
fabfcd
 	/*
fabfcd
 	 * NX target buffers
fabfcd
 	 */
fabfcd
-	ASSERT(s->used_out == 0);
fabfcd
-
fabfcd
-	uint32_t len_next_out = s->avail_out;
fabfcd
-	nx_append_dde(ddl_out, s->next_out, len_next_out); /* decomp in to user buffer */
fabfcd
-
fabfcd
-	/* overflow, used_out == 0 required by definition, +used_out below is unnecessary */
fabfcd
-	nx_append_dde(ddl_out, s->fifo_out + s->cur_out + s->used_out, s->len_out - s->cur_out - s->used_out);
fabfcd
-	target_sz = len_next_out + s->len_out - s->cur_out - s->used_out;
fabfcd
-
fabfcd
-	prt_info("len_next_out %d len_out %d cur_out %d used_out %d source_sz %d history_len %d\n",
fabfcd
-		 len_next_out, s->len_out, s->cur_out, s->used_out, source_sz, nx_history_len);
fabfcd
+	target_sz = nx_set_dde_out(s);
fabfcd
 
fabfcd
 	/* We want exactly the History size amount of 32KB to overflow
fabfcd
 	   in to fifo_out.  If overflow is less, the history spans
fabfcd
@@ -1228,6 +1248,7 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	   these copies (memcpy) for performance. Therefore, the
fabfcd
 	   heuristic here will estimate the source size for the
fabfcd
 	   desired target size */
fabfcd
+	uint32_t len_next_out = s->avail_out;
fabfcd
 
fabfcd
 	/* avail_out plus 32 KB history plus a bit of overhead */
fabfcd
 	uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
fabfcd
@@ -1240,11 +1261,14 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 
fabfcd
 	prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
fabfcd
 
fabfcd
+	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
fabfcd
+		 " used_out %d source_sz %d history_len %d\n",
fabfcd
+		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
fabfcd
+		 s->used_out, source_sz, nx_history_len);
fabfcd
+
fabfcd
 	/* do not include input side history in the estimation */
fabfcd
 	source_sz = source_sz - nx_history_len;
fabfcd
-
fabfcd
 	ASSERT(source_sz > 0);
fabfcd
-
fabfcd
 	source_sz = NX_MIN(source_sz, source_sz_expected);
fabfcd
 
fabfcd
 	/* add the history back */
fabfcd
fabfcd
From eb6cb7b01fe1fa337979353e905e3ad96514b233 Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Thu, 24 Mar 2022 18:37:27 -0300
fabfcd
Subject: [PATCH 5/6] inflate: cosmetic improvements
fabfcd
fabfcd
- Add source code comments.
fabfcd
- Improve indentation.
fabfcd
- Break long lines.
fabfcd
- Fix error and information messages.
fabfcd
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_inflate.c | 51 ++++++++++++++++++++++++++++++++++++------------
fabfcd
 1 file changed, 38 insertions(+), 13 deletions(-)
fabfcd
fabfcd
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
fabfcd
index a9671b2..a6070bd 100644
fabfcd
--- a/lib/nx_inflate.c
fabfcd
+++ b/lib/nx_inflate.c
fabfcd
@@ -1090,14 +1090,31 @@ static uint32_t nx_set_dde_out(nx_streamp s) {
fabfcd
  */
fabfcd
 static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 {
fabfcd
-	/* queuing, file ops, byte counting */
fabfcd
-	uint32_t write_sz, source_sz, target_sz;
fabfcd
+	/** \brief Sum of the bytes that may be used by NX as input
fabfcd
+	 *
fabfcd
+	 *  Total amount of bytes sent to the NX to be used as input,
fabfcd
+	 *  i.e. sum of the bytes in next_in and fifo_in.  */
fabfcd
+	uint32_t source_sz;
fabfcd
+
fabfcd
+	/** \brief Sum of the bytes that may be used by NX as output
fabfcd
+	 *
fabfcd
+	 *  Maximum amount of bytes available by the NX to be used as output,
fabfcd
+	 *  i.e. sum of the bytes available in next_out and fifo_out.  */
fabfcd
+	uint32_t target_sz;
fabfcd
+
fabfcd
+	uint32_t write_sz;
fabfcd
 	long loop_cnt = 0, loop_max = 0xffff;
fabfcd
 
fabfcd
 	/** \brief inflate benefits from large jobs; memcopies must be
fabfcd
 	 *  amortized.  */
fabfcd
 	const uint32_t inflate_per_job_len = nx_config.per_job_len;
fabfcd
 
fabfcd
+	/** \brief Estimated value for target_sz. Used to calculate
fabfcd
+	 *  source_sz_expected.  */
fabfcd
+	uint32_t target_sz_expected;
fabfcd
+	/** \brief Estimated value for source_sz.  */
fabfcd
+	uint32_t source_sz_expected;
fabfcd
+
fabfcd
 	/* nx hardware */
fabfcd
 	uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc;
fabfcd
 
fabfcd
@@ -1251,16 +1268,20 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	uint32_t len_next_out = s->avail_out;
fabfcd
 
fabfcd
 	/* avail_out plus 32 KB history plus a bit of overhead */
fabfcd
-	uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
fabfcd
+	target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
fabfcd
 
fabfcd
 	target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
fabfcd
 
fabfcd
 	/* e.g. if we want 100KB at the output and if the compression
fabfcd
 	   ratio is 10% we want 10KB if input */
fabfcd
-	uint32_t source_sz_expected = (uint32_t)(((uint64_t)target_sz_expected * s->last_comp_ratio + 1000L)/1000UL);
fabfcd
+	source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
fabfcd
+					  * s->last_comp_ratio + 1000L)/1000UL);
fabfcd
 
fabfcd
-	prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len);
fabfcd
 
fabfcd
+	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
fabfcd
+		 " source_sz %d last_comp_ratio %d nx_history_len %d\n",
fabfcd
+		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
fabfcd
+		 source_sz, s->last_comp_ratio, nx_history_len);
fabfcd
 	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
fabfcd
 		 " used_out %d source_sz %d history_len %d\n",
fabfcd
 		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
fabfcd
@@ -1280,8 +1301,11 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 
fabfcd
 	/* fault in pages */
fabfcd
 	nx_touch_pages_dde(ddl_in, source_sz, nx_config.page_sz, 0);
fabfcd
-	nx_touch_pages_dde(ddl_out, target_sz, nx_config.page_sz, 1);
fabfcd
-	nx_touch_pages( (void *)cmdp, sizeof(nx_gzip_crb_cpb_t), nx_config.page_sz, 0);
fabfcd
+	nx_touch_pages_dde(ddl_out,
fabfcd
+			   target_sz,
fabfcd
+			   nx_config.page_sz, 1);
fabfcd
+	nx_touch_pages((void *) cmdp, sizeof(nx_gzip_crb_cpb_t),
fabfcd
+		       nx_config.page_sz, 0);
fabfcd
 
fabfcd
 	/*
fabfcd
 	 * send job to NX
fabfcd
@@ -1298,9 +1322,9 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 		   faulting address to fsaddr */
fabfcd
 		print_dbg_info(s, __LINE__);
fabfcd
 
fabfcd
-		prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d ",
fabfcd
-			 (void *)cmdp->crb.csb.fsaddr, source_sz);
fabfcd
-		prt_warn("target_sz %d\n", target_sz);
fabfcd
+		prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d "
fabfcd
+			 "target_sz %d\n", (void *)cmdp->crb.csb.fsaddr,
fabfcd
+			 source_sz, target_sz);
fabfcd
 #ifdef NX_LOG_SOURCE_TARGET
fabfcd
 		nx_print_dde(ddl_in, "source");
fabfcd
 		nx_print_dde(ddl_out, "target");
fabfcd
@@ -1339,8 +1363,8 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 			if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
fabfcd
 			   /* TODO what to do when page faults are too many?
fabfcd
 			    * Kernel MM would have killed the process. */
fabfcd
-				prt_err("Cannot make progress; too many page");
fabfcd
-				prt_err(" faults cc= %d\n", cc);
fabfcd
+				prt_err("Cannot make progress; too many page"
fabfcd
+					" faults cc= %d\n", cc);
fabfcd
 			}
fabfcd
 			else {
fabfcd
 				prt_warn("ERR_NX_AT_FAULT: more retry\n");
fabfcd
@@ -1397,7 +1421,8 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 		   cover the max expansion of INF_MIN_INPUT_LEN
fabfcd
 		   bytes */
fabfcd
 
fabfcd
-		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data src %d hist %d\n", source_sz, nx_history_len);
fabfcd
+		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
fabfcd
+			 " src %d hist %d\n", source_sz, nx_history_len);
fabfcd
 		goto restart_nx;
fabfcd
 
fabfcd
 	case ERR_NX_OK:
fabfcd
fabfcd
From 806bf8e3ed1d0ae8a21bc6b2035df390f1062c26 Mon Sep 17 00:00:00 2001
fabfcd
From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
Date: Mon, 28 Mar 2022 18:15:37 -0300
fabfcd
Subject: [PATCH 6/6] inflate: Limit the amount of data added to the DDE
fabfcd
fabfcd
Stop adding all input and output data to the DDE and limit based on the
fabfcd
calculated value for source_sz_expected and target_sz_expected.
fabfcd
By limiting these values, we end up better estimating the amount of
fabfcd
pages that need to be touched, reducing the amount of time spent
fabfcd
touching pages that might not be used.
fabfcd
fabfcd
Reported-by: Puvichakravarthy Ramachandran <puvichakravarthy@in.ibm.com>
fabfcd
Reported-by: Poorna Chandra Vemula <Poorna.Chandra.Vemula@ibm.com>
fabfcd
Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
fabfcd
---
fabfcd
 lib/nx_inflate.c | 180 +++++++++++++++++++++++++++++------------------
fabfcd
 1 file changed, 111 insertions(+), 69 deletions(-)
fabfcd
fabfcd
diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c
fabfcd
index a6070bd..b30cdf6 100644
fabfcd
--- a/lib/nx_inflate.c
fabfcd
+++ b/lib/nx_inflate.c
fabfcd
@@ -955,13 +955,6 @@ static int nx_reset_dde(nx_streamp s) {
fabfcd
 	} else {
fabfcd
 		/* First decompress job */
fabfcd
 		fc = GZIP_FC_DECOMPRESS;
fabfcd
-
fabfcd
-		/* We use the most recently measured compression ratio
fabfcd
-		   as a heuristic to estimate the input and output
fabfcd
-		   sizes. If we give too much input, the target buffer
fabfcd
-		   overflows and NX cycles are wasted, and then we
fabfcd
-		   must retry with smaller input size. 1000 is 100% */
fabfcd
-		s->last_comp_ratio = 1000UL;
fabfcd
 	}
fabfcd
 
fabfcd
 	/* clear then copy fc to the crb */
fabfcd
@@ -1051,15 +1044,24 @@ static int nx_init_dde(nx_streamp s) {
fabfcd
 /** \brief Append input data to DDE
fabfcd
  *
fabfcd
  *  @param s nx_streamp to be processed.
fabfcd
+ *  @param source_sz_expected The total amount of bytes expected as input. It
fabfcd
+ *         does not include dictionary or history.
fabfcd
  *
fabfcd
  *  @return The total amount of bytes appended to DDE
fabfcd
  */
fabfcd
-static uint32_t nx_set_dde_in(nx_streamp s) {
fabfcd
+static uint32_t nx_set_dde_in(nx_streamp s, uint32_t source_sz_expected) {
fabfcd
+	uint32_t tmp = 0;
fabfcd
+
fabfcd
 	/* Buffered user input is next */
fabfcd
-	if (s->fifo_in != NULL)
fabfcd
-		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in);
fabfcd
-	/* Then current user input.  */
fabfcd
-	nx_append_dde(s->ddl_in, s->next_in, s->avail_in);
fabfcd
+	if (s->fifo_in != NULL) {
fabfcd
+		tmp = NX_MIN(s->used_in, source_sz_expected);
fabfcd
+		nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, tmp);
fabfcd
+	}
fabfcd
+	if (tmp < source_sz_expected) {
fabfcd
+		tmp = NX_MIN(s->avail_in, source_sz_expected - tmp);
fabfcd
+		/* Then current user input.  */
fabfcd
+		nx_append_dde(s->ddl_in, s->next_in, tmp);
fabfcd
+	}
fabfcd
 	/* Total bytes going in to engine.  */
fabfcd
 	return getp32(s->ddl_in, ddebc);
fabfcd
 }
fabfcd
@@ -1067,20 +1069,32 @@ static uint32_t nx_set_dde_in(nx_streamp s) {
fabfcd
 /** \brief Append output data to DDE
fabfcd
  *
fabfcd
  *  @param s nx_streamp to be processed.
fabfcd
+ *  @param target_sz_expected The total amount of bytes expected as output.
fabfcd
  *
fabfcd
  *  @return The total amount of bytes appended to DDE
fabfcd
  */
fabfcd
-static uint32_t nx_set_dde_out(nx_streamp s) {
fabfcd
+static uint32_t nx_set_dde_out(nx_streamp s, uint32_t target_sz_expected) {
fabfcd
+	uint32_t tmp;
fabfcd
+	uint32_t ret;
fabfcd
+
fabfcd
+	ret = NX_MIN(s->avail_out, target_sz_expected);
fabfcd
+
fabfcd
 	/* Decompress to user buffer first.  */
fabfcd
-	nx_append_dde(s->ddl_out, s->next_out, s->avail_out);
fabfcd
+	nx_append_dde(s->ddl_out, s->next_out, ret);
fabfcd
+
fabfcd
+	if (ret < target_sz_expected) {
fabfcd
+		tmp = NX_MIN(s->len_out - s->cur_out,
fabfcd
+			     target_sz_expected - ret);
fabfcd
+
fabfcd
+		/* Overflow to fifo_out.
fabfcd
+		   used_out == 0 required by definition.  */
fabfcd
+		ASSERT(s->used_out == 0);
fabfcd
+		nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, tmp);
fabfcd
 
fabfcd
-	/* Overflow to fifo_out.
fabfcd
-	   used_out == 0 required by definition.  */
fabfcd
-	ASSERT(s->used_out == 0);
fabfcd
-	nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out,
fabfcd
-		      s->len_out - s->cur_out);
fabfcd
+		ret += tmp;
fabfcd
+	}
fabfcd
 
fabfcd
-	return s->avail_out + s->len_out - s->cur_out;
fabfcd
+	return ret;
fabfcd
 }
fabfcd
 
fabfcd
 /** \brief Internal implementation of inflate.
fabfcd
@@ -1094,7 +1108,7 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	 *
fabfcd
 	 *  Total amount of bytes sent to the NX to be used as input,
fabfcd
 	 *  i.e. sum of the bytes in next_in and fifo_in.  */
fabfcd
-	uint32_t source_sz;
fabfcd
+	uint32_t source_sz = 0;
fabfcd
 
fabfcd
 	/** \brief Sum of the bytes that may be used by NX as output
fabfcd
 	 *
fabfcd
@@ -1242,46 +1256,69 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 	/* NX decompresses input data */
fabfcd
 
fabfcd
 	fc = nx_reset_dde(s);
fabfcd
-	nx_history_len = nx_init_dde(s);
fabfcd
 
fabfcd
-	/*
fabfcd
-	 * NX source buffers
fabfcd
-	 */
fabfcd
-	source_sz = nx_set_dde_in(s);
fabfcd
-	ASSERT(source_sz > nx_history_len);
fabfcd
+	/** Estimate the amount of data sent to the NX. Ideally, we want
fabfcd
+	 *  exactly the history size amount of 32 KiB to overflow in to fifo_out
fabfcd
+	 *  in order to minimize copies of memory.
fabfcd
+	 *  If overflow is less than 32 KiB, the history spans next_out and
fabfcd
+	 *  fifo_out and must be copied in to fifo_out to setup history for the
fabfcd
+	 *  next job. The fifo_out fraction is also copied back to user's
fabfcd
+	 *  next_out before the next job.
fabfcd
+	 *  If overflow is more, all the overflow must be copied back
fabfcd
+	 *  to user's next_out before the next job.
fabfcd
+	 *  If overflow is much more, we may get an ERR_NX_TARGET_SPACE, forcing
fabfcd
+	 *  us to reduce the source before trying again.  A retry in this case
fabfcd
+	 *  will probably require NX to process much more than 32 KiB, which
fabfcd
+	 *  requires more time than copying 32 KiB of data.
fabfcd
+	 *
fabfcd
+	 *  With that said, we want to minimize unecessary work (i.e. memcpy
fabfcd
+	 *  and retrying NX jobs) for performance. Therefore, the heuristic
fabfcd
+	 *  here will estimate the source size for the desired target size, but
fabfcd
+	 *  it prioritizes avoiding ERR_NX_TARGET_SPACE.  */
fabfcd
 
fabfcd
-	/*
fabfcd
-	 * NX target buffers
fabfcd
-	 */
fabfcd
-	target_sz = nx_set_dde_out(s);
fabfcd
-
fabfcd
-	/* We want exactly the History size amount of 32KB to overflow
fabfcd
-	   in to fifo_out.  If overflow is less, the history spans
fabfcd
-	   next_out and fifo_out and must be copied in to fifo_out to
fabfcd
-	   setup history for the next job, and the fifo_out fraction is
fabfcd
-	   also copied back to user's next_out before the next job.
fabfcd
-	   If overflow is more, all the overflow must be copied back
fabfcd
-	   to user's next_out before the next job. We want to minimize
fabfcd
-	   these copies (memcpy) for performance. Therefore, the
fabfcd
-	   heuristic here will estimate the source size for the
fabfcd
-	   desired target size */
fabfcd
 	uint32_t len_next_out = s->avail_out;
fabfcd
+	s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L);
fabfcd
 
fabfcd
-	/* avail_out plus 32 KB history plus a bit of overhead */
fabfcd
+	/* avail_out plus 32 KiB history plus a bit of overhead */
fabfcd
 	target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2);
fabfcd
-
fabfcd
 	target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len);
fabfcd
 
fabfcd
-	/* e.g. if we want 100KB at the output and if the compression
fabfcd
-	   ratio is 10% we want 10KB if input */
fabfcd
+	/** Calculate source_sz_expected based on target_sz_expected and the
fabfcd
+	 *  last compression ratio, e.g. if we want 100KB at the output and if
fabfcd
+	 *  the compression ratio is 10% we want 10KB if input */
fabfcd
 	source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected
fabfcd
 					  * s->last_comp_ratio + 1000L)/1000UL);
fabfcd
 
fabfcd
+	/** After calculating source_sz_expected, try to provide extra
fabfcd
+	 *  target_sz_expected in order to avoid an ERR_NX_TARGET_SPACE.  */
fabfcd
+	target_sz_expected = NX_MIN(len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2),
fabfcd
+				    4 * inflate_per_job_len);
fabfcd
+	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
fabfcd
+		 " source_sz %d last_comp_ratio %d\n",
fabfcd
+		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
fabfcd
+		 source_sz, s->last_comp_ratio);
fabfcd
+
fabfcd
+
fabfcd
+init_dde:
fabfcd
+	nx_history_len = nx_init_dde(s);
fabfcd
+
fabfcd
+	/*
fabfcd
+	 * NX source buffers
fabfcd
+	 */
fabfcd
+	source_sz = nx_set_dde_in(s, source_sz_expected);
fabfcd
+	ASSERT(source_sz > nx_history_len);
fabfcd
+	ASSERT(source_sz <= source_sz_expected + nx_history_len);
fabfcd
 
fabfcd
 	prt_info("%s:%d target_sz_expected %d source_sz_expected %d"
fabfcd
 		 " source_sz %d last_comp_ratio %d nx_history_len %d\n",
fabfcd
 		 __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected,
fabfcd
 		 source_sz, s->last_comp_ratio, nx_history_len);
fabfcd
+
fabfcd
+	/*
fabfcd
+	 * NX target buffers
fabfcd
+	 */
fabfcd
+	target_sz = nx_set_dde_out(s, target_sz_expected);
fabfcd
+
fabfcd
 	prt_info("%s:%d len_next_out %d len_out %d cur_out %d"
fabfcd
 		 " used_out %d source_sz %d history_len %d\n",
fabfcd
 		 __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out,
fabfcd
@@ -1345,19 +1382,22 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 			   that is about 2 pages minimum for source and
fabfcd
 			   and 6 pages for target; if the system does not
fabfcd
 			   have 8 free pages then the loop will last forever */
fabfcd
-			source_sz = source_sz - nx_history_len;
fabfcd
-			if (source_sz > (2 * INF_MIN_INPUT_LEN))
fabfcd
-				source_sz = (source_sz + 1) / 2;
fabfcd
-			else if (source_sz > INF_MIN_INPUT_LEN)
fabfcd
-				source_sz = INF_MIN_INPUT_LEN;
fabfcd
-
fabfcd
-			/* else if caller gave fewer source bytes, keep it as is */
fabfcd
-			source_sz = source_sz + nx_history_len;
fabfcd
-
fabfcd
-			if (target_sz > (2 * INF_MAX_EXPANSION_BYTES))
fabfcd
-				target_sz = (target_sz + 1) / 2;
fabfcd
-			else if (target_sz > INF_MAX_EXPANSION_BYTES)
fabfcd
-				target_sz = INF_MAX_EXPANSION_BYTES;
fabfcd
+			source_sz_expected = source_sz - nx_history_len;
fabfcd
+			if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
fabfcd
+				source_sz_expected
fabfcd
+					= (source_sz_expected + 1) / 2;
fabfcd
+			else if (source_sz_expected > INF_MIN_INPUT_LEN)
fabfcd
+				source_sz_expected = INF_MIN_INPUT_LEN;
fabfcd
+
fabfcd
+			/* else if caller gave fewer source bytes, keep it as
fabfcd
+			   is.  */
fabfcd
+			source_sz = source_sz_expected + nx_history_len;
fabfcd
+
fabfcd
+			if (target_sz_expected > (2 * INF_MAX_EXPANSION_BYTES))
fabfcd
+				target_sz_expected
fabfcd
+					= (target_sz_expected + 1) / 2;
fabfcd
+			else if (target_sz_expected > INF_MAX_EXPANSION_BYTES)
fabfcd
+				target_sz_expected = INF_MAX_EXPANSION_BYTES;
fabfcd
 
fabfcd
 			ticks_total = nx_wait_ticks(500, ticks_total, 0);
fabfcd
 			if (ticks_total > (timeout_pgfaults * nx_get_freq())) {
fabfcd
@@ -1368,7 +1408,8 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 			}
fabfcd
 			else {
fabfcd
 				prt_warn("ERR_NX_AT_FAULT: more retry\n");
fabfcd
-				goto restart_nx;
fabfcd
+				fc = nx_reset_dde(s);
fabfcd
+				goto init_dde;
fabfcd
 			}
fabfcd
 		}
fabfcd
 
fabfcd
@@ -1403,18 +1444,17 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 		/* Target buffer not large enough; retry smaller input
fabfcd
 		   data; give at least 1 byte. SPBC/TPBC are not valid */
fabfcd
 		ASSERT( source_sz > nx_history_len );
fabfcd
-		source_sz = ((source_sz - nx_history_len + 1) / 2) + nx_history_len;
fabfcd
+		source_sz_expected = (source_sz - nx_history_len + 1) / 2;
fabfcd
 
fabfcd
-		source_sz = source_sz - nx_history_len;
fabfcd
 		/* reduce large source down to minimum viable; if
fabfcd
 		   source is already small don't change it */
fabfcd
-		if (source_sz > (2 * INF_MIN_INPUT_LEN))
fabfcd
-			source_sz = (source_sz + 1) / 2;
fabfcd
-		else if (source_sz > INF_MIN_INPUT_LEN)
fabfcd
-			source_sz = INF_MIN_INPUT_LEN;
fabfcd
+		if (source_sz_expected > (2 * INF_MIN_INPUT_LEN))
fabfcd
+			source_sz_expected = (source_sz_expected + 1) / 2;
fabfcd
+		else if (source_sz_expected > INF_MIN_INPUT_LEN)
fabfcd
+			source_sz_expected = INF_MIN_INPUT_LEN;
fabfcd
 
fabfcd
 		/* else if caller gave fewer source bytes, keep it as is */
fabfcd
-		source_sz = source_sz + nx_history_len;
fabfcd
+		source_sz = source_sz_expected + nx_history_len;
fabfcd
 
fabfcd
 		/* do not change target size because we allocated a
fabfcd
 		   minimum of INF_MAX_EXPANSION_BYTES which should
fabfcd
@@ -1422,8 +1462,10 @@ static int nx_inflate_(nx_streamp s, int flush)
fabfcd
 		   bytes */
fabfcd
 
fabfcd
 		prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data"
fabfcd
-			 " src %d hist %d\n", source_sz, nx_history_len);
fabfcd
-		goto restart_nx;
fabfcd
+			 " source_sz_expected %d nx_history_len %d\n",
fabfcd
+			 source_sz_expected, nx_history_len);
fabfcd
+		fc = nx_reset_dde(s);
fabfcd
+		goto init_dde;
fabfcd
 
fabfcd
 	case ERR_NX_OK:
fabfcd