Blame SOURCES/0006-fs-ntfs3-Add-compression.patch

Kmods SIG d83023
From 522e010b58379fbe19b38fdef5016bca0c3cf405 Mon Sep 17 00:00:00 2001
Kmods SIG d83023
From: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Kmods SIG d83023
Date: Fri, 13 Aug 2021 17:21:30 +0300
Kmods SIG d83023
Subject: [Backport 522e010b5837] src: Add compression
Kmods SIG d83023
Kmods SIG d83023
This patch adds different types of NTFS-applicable compressions:
Kmods SIG d83023
- lznt
Kmods SIG d83023
- lzx
Kmods SIG d83023
- xpress
Kmods SIG d83023
Latter two (lzx, xpress) implement Windows Compact OS feature and
Kmods SIG d83023
were taken from ntfs-3g system comression plugin authored by Eric Biggers
Kmods SIG d83023
(https://github.com/ebiggers/ntfs-3g-system-compression)
Kmods SIG d83023
which were ported to ntfs3 and adapted to Linux Kernel environment.
Kmods SIG d83023
Kmods SIG d83023
Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Kmods SIG d83023
---
Kmods SIG d83023
 src/lib/decompress_common.c | 332 +++++++++++++++
Kmods SIG d83023
 src/lib/decompress_common.h | 352 ++++++++++++++++
Kmods SIG d83023
 src/lib/lib.h               |  26 ++
Kmods SIG d83023
 src/lib/lzx_decompress.c    | 683 +++++++++++++++++++++++++++++++
Kmods SIG d83023
 src/lib/xpress_decompress.c | 155 +++++++
Kmods SIG d83023
 src/lznt.c                  | 452 ++++++++++++++++++++
Kmods SIG d83023
 6 files changed, 2000 insertions(+)
Kmods SIG d83023
 create mode 100644 src/lib/decompress_common.c
Kmods SIG d83023
 create mode 100644 src/lib/decompress_common.h
Kmods SIG d83023
 create mode 100644 src/lib/lib.h
Kmods SIG d83023
 create mode 100644 src/lib/lzx_decompress.c
Kmods SIG d83023
 create mode 100644 src/lib/xpress_decompress.c
Kmods SIG d83023
 create mode 100644 src/lznt.c
Kmods SIG d83023
Kmods SIG d83023
diff --git a/src/lib/decompress_common.c b/src/lib/decompress_common.c
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..83c9e93aea77e437b5b1889b49272eee64d4df64
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lib/decompress_common.c
Kmods SIG d83023
@@ -0,0 +1,332 @@
Kmods SIG d83023
+// SPDX-License-Identifier: GPL-2.0-or-later
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * decompress_common.c - Code shared by the XPRESS and LZX decompressors
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Copyright (C) 2015 Eric Biggers
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is free software: you can redistribute it and/or modify it under
Kmods SIG d83023
+ * the terms of the GNU General Public License as published by the Free Software
Kmods SIG d83023
+ * Foundation, either version 2 of the License, or (at your option) any later
Kmods SIG d83023
+ * version.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is distributed in the hope that it will be useful, but WITHOUT
Kmods SIG d83023
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Kmods SIG d83023
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
Kmods SIG d83023
+ * details.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * You should have received a copy of the GNU General Public License along with
Kmods SIG d83023
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
Kmods SIG d83023
+ */
Kmods SIG d83023
+
Kmods SIG d83023
+#include "decompress_common.h"
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * make_huffman_decode_table() -
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Build a decoding table for a canonical prefix code, or "Huffman code".
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This is an internal function, not part of the library API!
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This takes as input the length of the codeword for each symbol in the
Kmods SIG d83023
+ * alphabet and produces as output a table that can be used for fast
Kmods SIG d83023
+ * decoding of prefix-encoded symbols using read_huffsym().
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Strictly speaking, a canonical prefix code might not be a Huffman
Kmods SIG d83023
+ * code.  But this algorithm will work either way; and in fact, since
Kmods SIG d83023
+ * Huffman codes are defined in terms of symbol frequencies, there is no
Kmods SIG d83023
+ * way for the decompressor to know whether the code is a true Huffman
Kmods SIG d83023
+ * code or not until all symbols have been decoded.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Because the prefix code is assumed to be "canonical", it can be
Kmods SIG d83023
+ * reconstructed directly from the codeword lengths.  A prefix code is
Kmods SIG d83023
+ * canonical if and only if a longer codeword never lexicographically
Kmods SIG d83023
+ * precedes a shorter codeword, and the lexicographic ordering of
Kmods SIG d83023
+ * codewords of the same length is the same as the lexicographic ordering
Kmods SIG d83023
+ * of the corresponding symbols.  Consequently, we can sort the symbols
Kmods SIG d83023
+ * primarily by codeword length and secondarily by symbol value, then
Kmods SIG d83023
+ * reconstruct the prefix code by generating codewords lexicographically
Kmods SIG d83023
+ * in that order.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This function does not, however, generate the prefix code explicitly.
Kmods SIG d83023
+ * Instead, it directly builds a table for decoding symbols using the
Kmods SIG d83023
+ * code.  The basic idea is this: given the next 'max_codeword_len' bits
Kmods SIG d83023
+ * in the input, we can look up the decoded symbol by indexing a table
Kmods SIG d83023
+ * containing 2**max_codeword_len entries.  A codeword with length
Kmods SIG d83023
+ * 'max_codeword_len' will have exactly one entry in this table, whereas
Kmods SIG d83023
+ * a codeword shorter than 'max_codeword_len' will have multiple entries
Kmods SIG d83023
+ * in this table.  Precisely, a codeword of length n will be represented
Kmods SIG d83023
+ * by 2**(max_codeword_len - n) entries in this table.  The 0-based index
Kmods SIG d83023
+ * of each such entry will contain the corresponding codeword as a prefix
Kmods SIG d83023
+ * when zero-padded on the left to 'max_codeword_len' binary digits.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * That's the basic idea, but we implement two optimizations regarding
Kmods SIG d83023
+ * the format of the decode table itself:
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * - For many compression formats, the maximum codeword length is too
Kmods SIG d83023
+ *   long for it to be efficient to build the full decoding table
Kmods SIG d83023
+ *   whenever a new prefix code is used.  Instead, we can build the table
Kmods SIG d83023
+ *   using only 2**table_bits entries, where 'table_bits' is some number
Kmods SIG d83023
+ *   less than or equal to 'max_codeword_len'.  Then, only codewords of
Kmods SIG d83023
+ *   length 'table_bits' and shorter can be directly looked up.  For
Kmods SIG d83023
+ *   longer codewords, the direct lookup instead produces the root of a
Kmods SIG d83023
+ *   binary tree.  Using this tree, the decoder can do traditional
Kmods SIG d83023
+ *   bit-by-bit decoding of the remainder of the codeword.  Child nodes
Kmods SIG d83023
+ *   are allocated in extra entries at the end of the table; leaf nodes
Kmods SIG d83023
+ *   contain symbols.  Note that the long-codeword case is, in general,
Kmods SIG d83023
+ *   not performance critical, since in Huffman codes the most frequently
Kmods SIG d83023
+ *   used symbols are assigned the shortest codeword lengths.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * - When we decode a symbol using a direct lookup of the table, we still
Kmods SIG d83023
+ *   need to know its length so that the bitstream can be advanced by the
Kmods SIG d83023
+ *   appropriate number of bits.  The simple solution is to simply retain
Kmods SIG d83023
+ *   the 'lens' array and use the decoded symbol as an index into it.
Kmods SIG d83023
+ *   However, this requires two separate array accesses in the fast path.
Kmods SIG d83023
+ *   The optimization is to store the length directly in the decode
Kmods SIG d83023
+ *   table.  We use the bottom 11 bits for the symbol and the top 5 bits
Kmods SIG d83023
+ *   for the length.  In addition, to combine this optimization with the
Kmods SIG d83023
+ *   previous one, we introduce a special case where the top 2 bits of
Kmods SIG d83023
+ *   the length are both set if the entry is actually the root of a
Kmods SIG d83023
+ *   binary tree.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @decode_table:
Kmods SIG d83023
+ *	The array in which to create the decoding table.  This must have
Kmods SIG d83023
+ *	a length of at least ((2**table_bits) + 2 * num_syms) entries.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @num_syms:
Kmods SIG d83023
+ *	The number of symbols in the alphabet; also, the length of the
Kmods SIG d83023
+ *	'lens' array.  Must be less than or equal to 2048.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @table_bits:
Kmods SIG d83023
+ *	The order of the decode table size, as explained above.  Must be
Kmods SIG d83023
+ *	less than or equal to 13.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @lens:
Kmods SIG d83023
+ *	An array of length @num_syms, indexable by symbol, that gives the
Kmods SIG d83023
+ *	length of the codeword, in bits, for that symbol.  The length can
Kmods SIG d83023
+ *	be 0, which means that the symbol does not have a codeword
Kmods SIG d83023
+ *	assigned.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @max_codeword_len:
Kmods SIG d83023
+ *	The longest codeword length allowed in the compression format.
Kmods SIG d83023
+ *	All entries in 'lens' must be less than or equal to this value.
Kmods SIG d83023
+ *	This must be less than or equal to 23.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @working_space
Kmods SIG d83023
+ *	A temporary array of length '2 * (max_codeword_len + 1) +
Kmods SIG d83023
+ *	num_syms'.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Returns 0 on success, or -1 if the lengths do not form a valid prefix
Kmods SIG d83023
+ * code.
Kmods SIG d83023
+ */
Kmods SIG d83023
+int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
Kmods SIG d83023
+			      const u32 table_bits, const u8 lens[],
Kmods SIG d83023
+			      const u32 max_codeword_len,
Kmods SIG d83023
+			      u16 working_space[])
Kmods SIG d83023
+{
Kmods SIG d83023
+	const u32 table_num_entries = 1 << table_bits;
Kmods SIG d83023
+	u16 * const len_counts = &working_space[0];
Kmods SIG d83023
+	u16 * const offsets = &working_space[1 * (max_codeword_len + 1)];
Kmods SIG d83023
+	u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)];
Kmods SIG d83023
+	int left;
Kmods SIG d83023
+	void *decode_table_ptr;
Kmods SIG d83023
+	u32 sym_idx;
Kmods SIG d83023
+	u32 codeword_len;
Kmods SIG d83023
+	u32 stores_per_loop;
Kmods SIG d83023
+	u32 decode_table_pos;
Kmods SIG d83023
+	u32 len;
Kmods SIG d83023
+	u32 sym;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Count how many symbols have each possible codeword length.
Kmods SIG d83023
+	 * Note that a length of 0 indicates the corresponding symbol is not
Kmods SIG d83023
+	 * used in the code and therefore does not have a codeword.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	for (len = 0; len <= max_codeword_len; len++)
Kmods SIG d83023
+		len_counts[len] = 0;
Kmods SIG d83023
+	for (sym = 0; sym < num_syms; sym++)
Kmods SIG d83023
+		len_counts[lens[sym]]++;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* We can assume all lengths are <= max_codeword_len, but we
Kmods SIG d83023
+	 * cannot assume they form a valid prefix code.  A codeword of
Kmods SIG d83023
+	 * length n should require a proportion of the codespace equaling
Kmods SIG d83023
+	 * (1/2)^n.  The code is valid if and only if the codespace is
Kmods SIG d83023
+	 * exactly filled by the lengths, by this measure.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	left = 1;
Kmods SIG d83023
+	for (len = 1; len <= max_codeword_len; len++) {
Kmods SIG d83023
+		left <<= 1;
Kmods SIG d83023
+		left -= len_counts[len];
Kmods SIG d83023
+		if (left < 0) {
Kmods SIG d83023
+			/* The lengths overflow the codespace; that is, the code
Kmods SIG d83023
+			 * is over-subscribed.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	if (left) {
Kmods SIG d83023
+		/* The lengths do not fill the codespace; that is, they form an
Kmods SIG d83023
+		 * incomplete set.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		if (left == (1 << max_codeword_len)) {
Kmods SIG d83023
+			/* The code is completely empty.  This is arguably
Kmods SIG d83023
+			 * invalid, but in fact it is valid in LZX and XPRESS,
Kmods SIG d83023
+			 * so we must allow it.  By definition, no symbols can
Kmods SIG d83023
+			 * be decoded with an empty code.  Consequently, we
Kmods SIG d83023
+			 * technically don't even need to fill in the decode
Kmods SIG d83023
+			 * table.  However, to avoid accessing uninitialized
Kmods SIG d83023
+			 * memory if the algorithm nevertheless attempts to
Kmods SIG d83023
+			 * decode symbols using such a code, we zero out the
Kmods SIG d83023
+			 * decode table.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+			memset(decode_table, 0,
Kmods SIG d83023
+			       table_num_entries * sizeof(decode_table[0]));
Kmods SIG d83023
+			return 0;
Kmods SIG d83023
+		}
Kmods SIG d83023
+		return -1;
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Sort the symbols primarily by length and secondarily by symbol order.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Initialize 'offsets' so that offsets[len] for 1 <= len <=
Kmods SIG d83023
+	 * max_codeword_len is the number of codewords shorter than 'len' bits.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	offsets[1] = 0;
Kmods SIG d83023
+	for (len = 1; len < max_codeword_len; len++)
Kmods SIG d83023
+		offsets[len + 1] = offsets[len] + len_counts[len];
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Use the 'offsets' array to sort the symbols.  Note that we do not
Kmods SIG d83023
+	 * include symbols that are not used in the code.  Consequently, fewer
Kmods SIG d83023
+	 * than 'num_syms' entries in 'sorted_syms' may be filled.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	for (sym = 0; sym < num_syms; sym++)
Kmods SIG d83023
+		if (lens[sym])
Kmods SIG d83023
+			sorted_syms[offsets[lens[sym]]++] = sym;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Fill entries for codewords with length <= table_bits
Kmods SIG d83023
+	 * --- that is, those short enough for a direct mapping.
Kmods SIG d83023
+	 *
Kmods SIG d83023
+	 * The table will start with entries for the shortest codeword(s), which
Kmods SIG d83023
+	 * have the most entries.  From there, the number of entries per
Kmods SIG d83023
+	 * codeword will decrease.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	decode_table_ptr = decode_table;
Kmods SIG d83023
+	sym_idx = 0;
Kmods SIG d83023
+	codeword_len = 1;
Kmods SIG d83023
+	stores_per_loop = (1 << (table_bits - codeword_len));
Kmods SIG d83023
+	for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) {
Kmods SIG d83023
+		u32 end_sym_idx = sym_idx + len_counts[codeword_len];
Kmods SIG d83023
+
Kmods SIG d83023
+		for (; sym_idx < end_sym_idx; sym_idx++) {
Kmods SIG d83023
+			u16 entry;
Kmods SIG d83023
+			u16 *p;
Kmods SIG d83023
+			u32 n;
Kmods SIG d83023
+
Kmods SIG d83023
+			entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx];
Kmods SIG d83023
+			p = (u16 *)decode_table_ptr;
Kmods SIG d83023
+			n = stores_per_loop;
Kmods SIG d83023
+
Kmods SIG d83023
+			do {
Kmods SIG d83023
+				*p++ = entry;
Kmods SIG d83023
+			} while (--n);
Kmods SIG d83023
+
Kmods SIG d83023
+			decode_table_ptr = p;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* If we've filled in the entire table, we are done.  Otherwise,
Kmods SIG d83023
+	 * there are codewords longer than table_bits for which we must
Kmods SIG d83023
+	 * generate binary trees.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	decode_table_pos = (u16 *)decode_table_ptr - decode_table;
Kmods SIG d83023
+	if (decode_table_pos != table_num_entries) {
Kmods SIG d83023
+		u32 j;
Kmods SIG d83023
+		u32 next_free_tree_slot;
Kmods SIG d83023
+		u32 cur_codeword;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* First, zero out the remaining entries.  This is
Kmods SIG d83023
+		 * necessary so that these entries appear as
Kmods SIG d83023
+		 * "unallocated" in the next part.  Each of these entries
Kmods SIG d83023
+		 * will eventually be filled with the representation of
Kmods SIG d83023
+		 * the root node of a binary tree.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		j = decode_table_pos;
Kmods SIG d83023
+		do {
Kmods SIG d83023
+			decode_table[j] = 0;
Kmods SIG d83023
+		} while (++j != table_num_entries);
Kmods SIG d83023
+
Kmods SIG d83023
+		/* We allocate child nodes starting at the end of the
Kmods SIG d83023
+		 * direct lookup table.  Note that there should be
Kmods SIG d83023
+		 * 2*num_syms extra entries for this purpose, although
Kmods SIG d83023
+		 * fewer than this may actually be needed.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		next_free_tree_slot = table_num_entries;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Iterate through each codeword with length greater than
Kmods SIG d83023
+		 * 'table_bits', primarily in order of codeword length
Kmods SIG d83023
+		 * and secondarily in order of symbol.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		for (cur_codeword = decode_table_pos << 1;
Kmods SIG d83023
+		     codeword_len <= max_codeword_len;
Kmods SIG d83023
+		     codeword_len++, cur_codeword <<= 1) {
Kmods SIG d83023
+			u32 end_sym_idx = sym_idx + len_counts[codeword_len];
Kmods SIG d83023
+
Kmods SIG d83023
+			for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) {
Kmods SIG d83023
+				/* 'sorted_sym' is the symbol represented by the
Kmods SIG d83023
+				 * codeword.
Kmods SIG d83023
+				 */
Kmods SIG d83023
+				u32 sorted_sym = sorted_syms[sym_idx];
Kmods SIG d83023
+				u32 extra_bits = codeword_len - table_bits;
Kmods SIG d83023
+				u32 node_idx = cur_codeword >> extra_bits;
Kmods SIG d83023
+
Kmods SIG d83023
+				/* Go through each bit of the current codeword
Kmods SIG d83023
+				 * beyond the prefix of length @table_bits and
Kmods SIG d83023
+				 * walk the appropriate binary tree, allocating
Kmods SIG d83023
+				 * any slots that have not yet been allocated.
Kmods SIG d83023
+				 *
Kmods SIG d83023
+				 * Note that the 'pointer' entry to the binary
Kmods SIG d83023
+				 * tree, which is stored in the direct lookup
Kmods SIG d83023
+				 * portion of the table, is represented
Kmods SIG d83023
+				 * identically to other internal (non-leaf)
Kmods SIG d83023
+				 * nodes of the binary tree; it can be thought
Kmods SIG d83023
+				 * of as simply the root of the tree.  The
Kmods SIG d83023
+				 * representation of these internal nodes is
Kmods SIG d83023
+				 * simply the index of the left child combined
Kmods SIG d83023
+				 * with the special bits 0xC000 to distingush
Kmods SIG d83023
+				 * the entry from direct mapping and leaf node
Kmods SIG d83023
+				 * entries.
Kmods SIG d83023
+				 */
Kmods SIG d83023
+				do {
Kmods SIG d83023
+					/* At least one bit remains in the
Kmods SIG d83023
+					 * codeword, but the current node is an
Kmods SIG d83023
+					 * unallocated leaf.  Change it to an
Kmods SIG d83023
+					 * internal node.
Kmods SIG d83023
+					 */
Kmods SIG d83023
+					if (decode_table[node_idx] == 0) {
Kmods SIG d83023
+						decode_table[node_idx] =
Kmods SIG d83023
+							next_free_tree_slot | 0xC000;
Kmods SIG d83023
+						decode_table[next_free_tree_slot++] = 0;
Kmods SIG d83023
+						decode_table[next_free_tree_slot++] = 0;
Kmods SIG d83023
+					}
Kmods SIG d83023
+
Kmods SIG d83023
+					/* Go to the left child if the next bit
Kmods SIG d83023
+					 * in the codeword is 0; otherwise go to
Kmods SIG d83023
+					 * the right child.
Kmods SIG d83023
+					 */
Kmods SIG d83023
+					node_idx = decode_table[node_idx] & 0x3FFF;
Kmods SIG d83023
+					--extra_bits;
Kmods SIG d83023
+					node_idx += (cur_codeword >> extra_bits) & 1;
Kmods SIG d83023
+				} while (extra_bits != 0);
Kmods SIG d83023
+
Kmods SIG d83023
+				/* We've traversed the tree using the entire
Kmods SIG d83023
+				 * codeword, and we're now at the entry where
Kmods SIG d83023
+				 * the actual symbol will be stored.  This is
Kmods SIG d83023
+				 * distinguished from internal nodes by not
Kmods SIG d83023
+				 * having its high two bits set.
Kmods SIG d83023
+				 */
Kmods SIG d83023
+				decode_table[node_idx] = sorted_sym;
Kmods SIG d83023
+			}
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+}
Kmods SIG d83023
diff --git a/src/lib/decompress_common.h b/src/lib/decompress_common.h
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..66297f398403f13abe05f3b5af1aa7c5674351e8
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lib/decompress_common.h
Kmods SIG d83023
@@ -0,0 +1,352 @@
Kmods SIG d83023
+/* SPDX-License-Identifier: GPL-2.0-or-later */
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * decompress_common.h - Code shared by the XPRESS and LZX decompressors
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Copyright (C) 2015 Eric Biggers
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is free software: you can redistribute it and/or modify it under
Kmods SIG d83023
+ * the terms of the GNU General Public License as published by the Free Software
Kmods SIG d83023
+ * Foundation, either version 2 of the License, or (at your option) any later
Kmods SIG d83023
+ * version.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is distributed in the hope that it will be useful, but WITHOUT
Kmods SIG d83023
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Kmods SIG d83023
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
Kmods SIG d83023
+ * details.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * You should have received a copy of the GNU General Public License along with
Kmods SIG d83023
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
Kmods SIG d83023
+ */
Kmods SIG d83023
+
Kmods SIG d83023
+#include <linux/string.h>
Kmods SIG d83023
+#include <linux/compiler.h>
Kmods SIG d83023
+#include <linux/types.h>
Kmods SIG d83023
+#include <linux/slab.h>
Kmods SIG d83023
+#include <asm/unaligned.h>
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+/* "Force inline" macro (not required, but helpful for performance)  */
Kmods SIG d83023
+#define forceinline __always_inline
Kmods SIG d83023
+
Kmods SIG d83023
+/* Enable whole-word match copying on selected architectures  */
Kmods SIG d83023
+#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED)
Kmods SIG d83023
+#  define FAST_UNALIGNED_ACCESS
Kmods SIG d83023
+#endif
Kmods SIG d83023
+
Kmods SIG d83023
+/* Size of a machine word  */
Kmods SIG d83023
+#define WORDBYTES (sizeof(size_t))
Kmods SIG d83023
+
Kmods SIG d83023
+static forceinline void
Kmods SIG d83023
+copy_unaligned_word(const void *src, void *dst)
Kmods SIG d83023
+{
Kmods SIG d83023
+	put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+/* Generate a "word" with platform-dependent size whose bytes all contain the
Kmods SIG d83023
+ * value 'b'.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline size_t repeat_byte(u8 b)
Kmods SIG d83023
+{
Kmods SIG d83023
+	size_t v;
Kmods SIG d83023
+
Kmods SIG d83023
+	v = b;
Kmods SIG d83023
+	v |= v << 8;
Kmods SIG d83023
+	v |= v << 16;
Kmods SIG d83023
+	v |= v << ((WORDBYTES == 8) ? 32 : 0);
Kmods SIG d83023
+	return v;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Structure that encapsulates a block of in-memory data being interpreted as a
Kmods SIG d83023
+ * stream of bits, optionally with interwoven literal bytes.  Bits are assumed
Kmods SIG d83023
+ * to be stored in little endian 16-bit coding units, with the bits ordered high
Kmods SIG d83023
+ * to low.
Kmods SIG d83023
+ */
Kmods SIG d83023
+struct input_bitstream {
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Bits that have been read from the input buffer.  The bits are
Kmods SIG d83023
+	 * left-justified; the next bit is always bit 31.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	u32 bitbuf;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Number of bits currently held in @bitbuf.  */
Kmods SIG d83023
+	u32 bitsleft;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Pointer to the next byte to be retrieved from the input buffer.  */
Kmods SIG d83023
+	const u8 *next;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Pointer to just past the end of the input buffer.  */
Kmods SIG d83023
+	const u8 *end;
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+/* Initialize a bitstream to read from the specified input buffer.  */
Kmods SIG d83023
+static forceinline void init_input_bitstream(struct input_bitstream *is,
Kmods SIG d83023
+					     const void *buffer, u32 size)
Kmods SIG d83023
+{
Kmods SIG d83023
+	is->bitbuf = 0;
Kmods SIG d83023
+	is->bitsleft = 0;
Kmods SIG d83023
+	is->next = buffer;
Kmods SIG d83023
+	is->end = is->next + size;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Ensure the bit buffer variable for the bitstream contains at least @num_bits
Kmods SIG d83023
+ * bits.  Following this, bitstream_peek_bits() and/or bitstream_remove_bits()
Kmods SIG d83023
+ * may be called on the bitstream to peek or remove up to @num_bits bits.  Note
Kmods SIG d83023
+ * that @num_bits must be <= 16.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline void bitstream_ensure_bits(struct input_bitstream *is,
Kmods SIG d83023
+					      u32 num_bits)
Kmods SIG d83023
+{
Kmods SIG d83023
+	if (is->bitsleft < num_bits) {
Kmods SIG d83023
+		if (is->end - is->next >= 2) {
Kmods SIG d83023
+			is->bitbuf |= (u32)get_unaligned_le16(is->next)
Kmods SIG d83023
+					<< (16 - is->bitsleft);
Kmods SIG d83023
+			is->next += 2;
Kmods SIG d83023
+		}
Kmods SIG d83023
+		is->bitsleft += 16;
Kmods SIG d83023
+	}
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Return the next @num_bits bits from the bitstream, without removing them.
Kmods SIG d83023
+ * There must be at least @num_bits remaining in the buffer variable, from a
Kmods SIG d83023
+ * previous call to bitstream_ensure_bits().
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline u32
Kmods SIG d83023
+bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Remove @num_bits from the bitstream.  There must be at least @num_bits
Kmods SIG d83023
+ * remaining in the buffer variable, from a previous call to
Kmods SIG d83023
+ * bitstream_ensure_bits().
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline void
Kmods SIG d83023
+bitstream_remove_bits(struct input_bitstream *is, u32 num_bits)
Kmods SIG d83023
+{
Kmods SIG d83023
+	is->bitbuf <<= num_bits;
Kmods SIG d83023
+	is->bitsleft -= num_bits;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Remove and return @num_bits bits from the bitstream.  There must be at least
Kmods SIG d83023
+ * @num_bits remaining in the buffer variable, from a previous call to
Kmods SIG d83023
+ * bitstream_ensure_bits().
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline u32
Kmods SIG d83023
+bitstream_pop_bits(struct input_bitstream *is, u32 num_bits)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u32 bits = bitstream_peek_bits(is, num_bits);
Kmods SIG d83023
+
Kmods SIG d83023
+	bitstream_remove_bits(is, num_bits);
Kmods SIG d83023
+	return bits;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read and return the next @num_bits bits from the bitstream.  */
Kmods SIG d83023
+static forceinline u32
Kmods SIG d83023
+bitstream_read_bits(struct input_bitstream *is, u32 num_bits)
Kmods SIG d83023
+{
Kmods SIG d83023
+	bitstream_ensure_bits(is, num_bits);
Kmods SIG d83023
+	return bitstream_pop_bits(is, num_bits);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read and return the next literal byte embedded in the bitstream.  */
Kmods SIG d83023
+static forceinline u8
Kmods SIG d83023
+bitstream_read_byte(struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	if (unlikely(is->end == is->next))
Kmods SIG d83023
+		return 0;
Kmods SIG d83023
+	return *is->next++;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read and return the next 16-bit integer embedded in the bitstream.  */
Kmods SIG d83023
+static forceinline u16
Kmods SIG d83023
+bitstream_read_u16(struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u16 v;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (unlikely(is->end - is->next < 2))
Kmods SIG d83023
+		return 0;
Kmods SIG d83023
+	v = get_unaligned_le16(is->next);
Kmods SIG d83023
+	is->next += 2;
Kmods SIG d83023
+	return v;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read and return the next 32-bit integer embedded in the bitstream.  */
Kmods SIG d83023
+static forceinline u32
Kmods SIG d83023
+bitstream_read_u32(struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u32 v;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (unlikely(is->end - is->next < 4))
Kmods SIG d83023
+		return 0;
Kmods SIG d83023
+	v = get_unaligned_le32(is->next);
Kmods SIG d83023
+	is->next += 4;
Kmods SIG d83023
+	return v;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read into @dst_buffer an array of literal bytes embedded in the bitstream.
Kmods SIG d83023
+ * Return either a pointer to the byte past the last written, or NULL if the
Kmods SIG d83023
+ * read overflows the input buffer.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline void *bitstream_read_bytes(struct input_bitstream *is,
Kmods SIG d83023
+					      void *dst_buffer, size_t count)
Kmods SIG d83023
+{
Kmods SIG d83023
+	if ((size_t)(is->end - is->next) < count)
Kmods SIG d83023
+		return NULL;
Kmods SIG d83023
+	memcpy(dst_buffer, is->next, count);
Kmods SIG d83023
+	is->next += count;
Kmods SIG d83023
+	return (u8 *)dst_buffer + count;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Align the input bitstream on a coding-unit boundary.  */
Kmods SIG d83023
+static forceinline void bitstream_align(struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	is->bitsleft = 0;
Kmods SIG d83023
+	is->bitbuf = 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms,
Kmods SIG d83023
+				     const u32 num_bits, const u8 lens[],
Kmods SIG d83023
+				     const u32 max_codeword_len,
Kmods SIG d83023
+				     u16 working_space[]);
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+/* Reads and returns the next Huffman-encoded symbol from a bitstream.  If the
Kmods SIG d83023
+ * input data is exhausted, the Huffman symbol is decoded as if the missing bits
Kmods SIG d83023
+ * are all zeroes.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline u32 read_huffsym(struct input_bitstream *istream,
Kmods SIG d83023
+					 const u16 decode_table[],
Kmods SIG d83023
+					 u32 table_bits,
Kmods SIG d83023
+					 u32 max_codeword_len)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u32 entry;
Kmods SIG d83023
+	u32 key_bits;
Kmods SIG d83023
+
Kmods SIG d83023
+	bitstream_ensure_bits(istream, max_codeword_len);
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Index the decode table by the next table_bits bits of the input.  */
Kmods SIG d83023
+	key_bits = bitstream_peek_bits(istream, table_bits);
Kmods SIG d83023
+	entry = decode_table[key_bits];
Kmods SIG d83023
+	if (entry < 0xC000) {
Kmods SIG d83023
+		/* Fast case: The decode table directly provided the
Kmods SIG d83023
+		 * symbol and codeword length.  The low 11 bits are the
Kmods SIG d83023
+		 * symbol, and the high 5 bits are the codeword length.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		bitstream_remove_bits(istream, entry >> 11);
Kmods SIG d83023
+		return entry & 0x7FF;
Kmods SIG d83023
+	}
Kmods SIG d83023
+	/* Slow case: The codeword for the symbol is longer than
Kmods SIG d83023
+	 * table_bits, so the symbol does not have an entry
Kmods SIG d83023
+	 * directly in the first (1 << table_bits) entries of the
Kmods SIG d83023
+	 * decode table.  Traverse the appropriate binary tree
Kmods SIG d83023
+	 * bit-by-bit to decode the symbol.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	bitstream_remove_bits(istream, table_bits);
Kmods SIG d83023
+	do {
Kmods SIG d83023
+		key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1);
Kmods SIG d83023
+	} while ((entry = decode_table[key_bits]) >= 0xC000);
Kmods SIG d83023
+	return entry;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * Copy an LZ77 match at (dst - offset) to dst.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * The length and offset must be already validated --- that is, (dst - offset)
Kmods SIG d83023
+ * can't underrun the output buffer, and (dst + length) can't overrun the output
Kmods SIG d83023
+ * buffer.  Also, the length cannot be 0.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @bufend points to the byte past the end of the output buffer.  This function
Kmods SIG d83023
+ * won't write any data beyond this position.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Returns dst + length.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend,
Kmods SIG d83023
+			       u32 min_length)
Kmods SIG d83023
+{
Kmods SIG d83023
+	const u8 *src = dst - offset;
Kmods SIG d83023
+
Kmods SIG d83023
+	/*
Kmods SIG d83023
+	 * Try to copy one machine word at a time.  On i386 and x86_64 this is
Kmods SIG d83023
+	 * faster than copying one byte at a time, unless the data is
Kmods SIG d83023
+	 * near-random and all the matches have very short lengths.  Note that
Kmods SIG d83023
+	 * since this requires unaligned memory accesses, it won't necessarily
Kmods SIG d83023
+	 * be faster on every architecture.
Kmods SIG d83023
+	 *
Kmods SIG d83023
+	 * Also note that we might copy more than the length of the match.  For
Kmods SIG d83023
+	 * example, if a word is 8 bytes and the match is of length 5, then
Kmods SIG d83023
+	 * we'll simply copy 8 bytes.  This is okay as long as we don't write
Kmods SIG d83023
+	 * beyond the end of the output buffer, hence the check for (bufend -
Kmods SIG d83023
+	 * end >= WORDBYTES - 1).
Kmods SIG d83023
+	 */
Kmods SIG d83023
+#ifdef FAST_UNALIGNED_ACCESS
Kmods SIG d83023
+	u8 * const end = dst + length;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) {
Kmods SIG d83023
+
Kmods SIG d83023
+		if (offset >= WORDBYTES) {
Kmods SIG d83023
+			/* The source and destination words don't overlap.  */
Kmods SIG d83023
+
Kmods SIG d83023
+			/* To improve branch prediction, one iteration of this
Kmods SIG d83023
+			 * loop is unrolled.  Most matches are short and will
Kmods SIG d83023
+			 * fail the first check.  But if that check passes, then
Kmods SIG d83023
+			 * it becomes increasing likely that the match is long
Kmods SIG d83023
+			 * and we'll need to continue copying.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+
Kmods SIG d83023
+			copy_unaligned_word(src, dst);
Kmods SIG d83023
+			src += WORDBYTES;
Kmods SIG d83023
+			dst += WORDBYTES;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (dst < end) {
Kmods SIG d83023
+				do {
Kmods SIG d83023
+					copy_unaligned_word(src, dst);
Kmods SIG d83023
+					src += WORDBYTES;
Kmods SIG d83023
+					dst += WORDBYTES;
Kmods SIG d83023
+				} while (dst < end);
Kmods SIG d83023
+			}
Kmods SIG d83023
+			return end;
Kmods SIG d83023
+		} else if (offset == 1) {
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Offset 1 matches are equivalent to run-length
Kmods SIG d83023
+			 * encoding of the previous byte.  This case is common
Kmods SIG d83023
+			 * if the data contains many repeated bytes.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+			size_t v = repeat_byte(*(dst - 1));
Kmods SIG d83023
+
Kmods SIG d83023
+			do {
Kmods SIG d83023
+				put_unaligned(v, (size_t *)dst);
Kmods SIG d83023
+				src += WORDBYTES;
Kmods SIG d83023
+				dst += WORDBYTES;
Kmods SIG d83023
+			} while (dst < end);
Kmods SIG d83023
+			return end;
Kmods SIG d83023
+		}
Kmods SIG d83023
+		/*
Kmods SIG d83023
+		 * We don't bother with special cases for other 'offset <
Kmods SIG d83023
+		 * WORDBYTES', which are usually rarer than 'offset == 1'.  Extra
Kmods SIG d83023
+		 * checks will just slow things down.  Actually, it's possible
Kmods SIG d83023
+		 * to handle all the 'offset < WORDBYTES' cases using the same
Kmods SIG d83023
+		 * code, but it still becomes more complicated doesn't seem any
Kmods SIG d83023
+		 * faster overall; it definitely slows down the more common
Kmods SIG d83023
+		 * 'offset == 1' case.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+	}
Kmods SIG d83023
+#endif /* FAST_UNALIGNED_ACCESS */
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Fall back to a bytewise copy.  */
Kmods SIG d83023
+
Kmods SIG d83023
+	if (min_length >= 2) {
Kmods SIG d83023
+		*dst++ = *src++;
Kmods SIG d83023
+		length--;
Kmods SIG d83023
+	}
Kmods SIG d83023
+	if (min_length >= 3) {
Kmods SIG d83023
+		*dst++ = *src++;
Kmods SIG d83023
+		length--;
Kmods SIG d83023
+	}
Kmods SIG d83023
+	do {
Kmods SIG d83023
+		*dst++ = *src++;
Kmods SIG d83023
+	} while (--length);
Kmods SIG d83023
+
Kmods SIG d83023
+	return dst;
Kmods SIG d83023
+}
Kmods SIG d83023
diff --git a/src/lib/lib.h b/src/lib/lib.h
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..f508fbad2e712d946274b13f0ec7b244dc264a4d
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lib/lib.h
Kmods SIG d83023
@@ -0,0 +1,26 @@
Kmods SIG d83023
+/* SPDX-License-Identifier: GPL-2.0-or-later */
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * Adapted for linux kernel by Alexander Mamaev:
Kmods SIG d83023
+ * - remove implementations of get_unaligned_
Kmods SIG d83023
+ * - assume GCC is always defined
Kmods SIG d83023
+ * - ISO C90
Kmods SIG d83023
+ * - linux kernel code style
Kmods SIG d83023
+ */
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+/* globals from xpress_decompress.c */
Kmods SIG d83023
+struct xpress_decompressor *xpress_allocate_decompressor(void);
Kmods SIG d83023
+void xpress_free_decompressor(struct xpress_decompressor *d);
Kmods SIG d83023
+int xpress_decompress(struct xpress_decompressor *__restrict d,
Kmods SIG d83023
+		      const void *__restrict compressed_data,
Kmods SIG d83023
+		      size_t compressed_size,
Kmods SIG d83023
+		      void *__restrict uncompressed_data,
Kmods SIG d83023
+		      size_t uncompressed_size);
Kmods SIG d83023
+
Kmods SIG d83023
+/* globals from lzx_decompress.c */
Kmods SIG d83023
+struct lzx_decompressor *lzx_allocate_decompressor(void);
Kmods SIG d83023
+void lzx_free_decompressor(struct lzx_decompressor *d);
Kmods SIG d83023
+int lzx_decompress(struct lzx_decompressor *__restrict d,
Kmods SIG d83023
+		   const void *__restrict compressed_data,
Kmods SIG d83023
+		   size_t compressed_size, void *__restrict uncompressed_data,
Kmods SIG d83023
+		   size_t uncompressed_size);
Kmods SIG d83023
diff --git a/src/lib/lzx_decompress.c b/src/lib/lzx_decompress.c
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..77a381a693d117e3e8c4860130880fedf3af868e
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lib/lzx_decompress.c
Kmods SIG d83023
@@ -0,0 +1,683 @@
Kmods SIG d83023
+// SPDX-License-Identifier: GPL-2.0-or-later
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * lzx_decompress.c - A decompressor for the LZX compression format, which can
Kmods SIG d83023
+ * be used in "System Compressed" files.  This is based on the code from wimlib.
Kmods SIG d83023
+ * This code only supports a window size (dictionary size) of 32768 bytes, since
Kmods SIG d83023
+ * this is the only size used in System Compression.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Copyright (C) 2015 Eric Biggers
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is free software: you can redistribute it and/or modify it under
Kmods SIG d83023
+ * the terms of the GNU General Public License as published by the Free Software
Kmods SIG d83023
+ * Foundation, either version 2 of the License, or (at your option) any later
Kmods SIG d83023
+ * version.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is distributed in the hope that it will be useful, but WITHOUT
Kmods SIG d83023
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Kmods SIG d83023
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
Kmods SIG d83023
+ * details.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * You should have received a copy of the GNU General Public License along with
Kmods SIG d83023
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
Kmods SIG d83023
+ */
Kmods SIG d83023
+
Kmods SIG d83023
+#include "decompress_common.h"
Kmods SIG d83023
+#include "lib.h"
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of literal byte values  */
Kmods SIG d83023
+#define LZX_NUM_CHARS			256
Kmods SIG d83023
+
Kmods SIG d83023
+/* The smallest and largest allowed match lengths  */
Kmods SIG d83023
+#define LZX_MIN_MATCH_LEN		2
Kmods SIG d83023
+#define LZX_MAX_MATCH_LEN		257
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of distinct match lengths that can be represented  */
Kmods SIG d83023
+#define LZX_NUM_LENS			(LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1)
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of match lengths for which no length symbol is required  */
Kmods SIG d83023
+#define LZX_NUM_PRIMARY_LENS		7
Kmods SIG d83023
+#define LZX_NUM_LEN_HEADERS		(LZX_NUM_PRIMARY_LENS + 1)
Kmods SIG d83023
+
Kmods SIG d83023
+/* Valid values of the 3-bit block type field  */
Kmods SIG d83023
+#define LZX_BLOCKTYPE_VERBATIM		1
Kmods SIG d83023
+#define LZX_BLOCKTYPE_ALIGNED		2
Kmods SIG d83023
+#define LZX_BLOCKTYPE_UNCOMPRESSED	3
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of offset slots for a window size of 32768  */
Kmods SIG d83023
+#define LZX_NUM_OFFSET_SLOTS		30
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of symbols in the main code for a window size of 32768  */
Kmods SIG d83023
+#define LZX_MAINCODE_NUM_SYMBOLS	\
Kmods SIG d83023
+	(LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS))
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of symbols in the length code  */
Kmods SIG d83023
+#define LZX_LENCODE_NUM_SYMBOLS		(LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS)
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of symbols in the precode  */
Kmods SIG d83023
+#define LZX_PRECODE_NUM_SYMBOLS		20
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of bits in which each precode codeword length is represented  */
Kmods SIG d83023
+#define LZX_PRECODE_ELEMENT_SIZE	4
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of low-order bits of each match offset that are entropy-encoded in
Kmods SIG d83023
+ * aligned offset blocks
Kmods SIG d83023
+ */
Kmods SIG d83023
+#define LZX_NUM_ALIGNED_OFFSET_BITS	3
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of symbols in the aligned offset code  */
Kmods SIG d83023
+#define LZX_ALIGNEDCODE_NUM_SYMBOLS	(1 << LZX_NUM_ALIGNED_OFFSET_BITS)
Kmods SIG d83023
+
Kmods SIG d83023
+/* Mask for the match offset bits that are entropy-encoded in aligned offset
Kmods SIG d83023
+ * blocks
Kmods SIG d83023
+ */
Kmods SIG d83023
+#define LZX_ALIGNED_OFFSET_BITMASK	((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1)
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of bits in which each aligned offset codeword length is represented  */
Kmods SIG d83023
+#define LZX_ALIGNEDCODE_ELEMENT_SIZE	3
Kmods SIG d83023
+
Kmods SIG d83023
+/* Maximum lengths (in bits) of the codewords in each Huffman code  */
Kmods SIG d83023
+#define LZX_MAX_MAIN_CODEWORD_LEN	16
Kmods SIG d83023
+#define LZX_MAX_LEN_CODEWORD_LEN	16
Kmods SIG d83023
+#define LZX_MAX_PRE_CODEWORD_LEN	((1 << LZX_PRECODE_ELEMENT_SIZE) - 1)
Kmods SIG d83023
+#define LZX_MAX_ALIGNED_CODEWORD_LEN	((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1)
Kmods SIG d83023
+
Kmods SIG d83023
+/* The default "filesize" value used in pre/post-processing.  In the LZX format
Kmods SIG d83023
+ * used in cabinet files this value must be given to the decompressor, whereas
Kmods SIG d83023
+ * in the LZX format used in WIM files and system-compressed files this value is
Kmods SIG d83023
+ * fixed at 12000000.
Kmods SIG d83023
+ */
Kmods SIG d83023
+#define LZX_DEFAULT_FILESIZE		12000000
Kmods SIG d83023
+
Kmods SIG d83023
+/* Assumed block size when the encoded block size begins with a 0 bit.  */
Kmods SIG d83023
+#define LZX_DEFAULT_BLOCK_SIZE		32768
Kmods SIG d83023
+
Kmods SIG d83023
+/* Number of offsets in the recent (or "repeat") offsets queue.  */
Kmods SIG d83023
+#define LZX_NUM_RECENT_OFFSETS		3
Kmods SIG d83023
+
Kmods SIG d83023
+/* These values are chosen for fast decompression.  */
Kmods SIG d83023
+#define LZX_MAINCODE_TABLEBITS		11
Kmods SIG d83023
+#define LZX_LENCODE_TABLEBITS		10
Kmods SIG d83023
+#define LZX_PRECODE_TABLEBITS		6
Kmods SIG d83023
+#define LZX_ALIGNEDCODE_TABLEBITS	7
Kmods SIG d83023
+
Kmods SIG d83023
+#define LZX_READ_LENS_MAX_OVERRUN	50
Kmods SIG d83023
+
Kmods SIG d83023
+/* Mapping: offset slot => first match offset that uses that offset slot.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = {
Kmods SIG d83023
+	0,	1,	2,	3,	4,	/* 0  --- 4  */
Kmods SIG d83023
+	6,	8,	12,	16,	24,	/* 5  --- 9  */
Kmods SIG d83023
+	32,	48,	64,	96,	128,	/* 10 --- 14 */
Kmods SIG d83023
+	192,	256,	384,	512,	768,	/* 15 --- 19 */
Kmods SIG d83023
+	1024,	1536,	2048,	3072,	4096,   /* 20 --- 24 */
Kmods SIG d83023
+	6144,	8192,	12288,	16384,	24576,	/* 25 --- 29 */
Kmods SIG d83023
+	32768,					/* extra     */
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+/* Mapping: offset slot => how many extra bits must be read and added to the
Kmods SIG d83023
+ * corresponding offset slot base to decode the match offset.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = {
Kmods SIG d83023
+	0,	0,	0,	0,	1,
Kmods SIG d83023
+	1,	2,	2,	3,	3,
Kmods SIG d83023
+	4,	4,	5,	5,	6,
Kmods SIG d83023
+	6,	7,	7,	8,	8,
Kmods SIG d83023
+	9,	9,	10,	10,	11,
Kmods SIG d83023
+	11,	12,	12,	13,	13,
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+/* Reusable heap-allocated memory for LZX decompression  */
Kmods SIG d83023
+struct lzx_decompressor {
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Huffman decoding tables, and arrays that map symbols to codeword
Kmods SIG d83023
+	 * lengths
Kmods SIG d83023
+	 */
Kmods SIG d83023
+
Kmods SIG d83023
+	u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) +
Kmods SIG d83023
+					(LZX_MAINCODE_NUM_SYMBOLS * 2)];
Kmods SIG d83023
+	u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+	u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) +
Kmods SIG d83023
+					(LZX_LENCODE_NUM_SYMBOLS * 2)];
Kmods SIG d83023
+	u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN];
Kmods SIG d83023
+
Kmods SIG d83023
+
Kmods SIG d83023
+	u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) +
Kmods SIG d83023
+					(LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)];
Kmods SIG d83023
+	u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS];
Kmods SIG d83023
+
Kmods SIG d83023
+	u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) +
Kmods SIG d83023
+				 (LZX_PRECODE_NUM_SYMBOLS * 2)];
Kmods SIG d83023
+	u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS];
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Temporary space for make_huffman_decode_table()  */
Kmods SIG d83023
+	u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) +
Kmods SIG d83023
+			  LZX_MAINCODE_NUM_SYMBOLS];
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+static void undo_e8_translation(void *target, s32 input_pos)
Kmods SIG d83023
+{
Kmods SIG d83023
+	s32 abs_offset, rel_offset;
Kmods SIG d83023
+
Kmods SIG d83023
+	abs_offset = get_unaligned_le32(target);
Kmods SIG d83023
+	if (abs_offset >= 0) {
Kmods SIG d83023
+		if (abs_offset < LZX_DEFAULT_FILESIZE) {
Kmods SIG d83023
+			/* "good translation" */
Kmods SIG d83023
+			rel_offset = abs_offset - input_pos;
Kmods SIG d83023
+			put_unaligned_le32(rel_offset, target);
Kmods SIG d83023
+		}
Kmods SIG d83023
+	} else {
Kmods SIG d83023
+		if (abs_offset >= -input_pos) {
Kmods SIG d83023
+			/* "compensating translation" */
Kmods SIG d83023
+			rel_offset = abs_offset + LZX_DEFAULT_FILESIZE;
Kmods SIG d83023
+			put_unaligned_le32(rel_offset, target);
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * Undo the 'E8' preprocessing used in LZX.  Before compression, the
Kmods SIG d83023
+ * uncompressed data was preprocessed by changing the targets of suspected x86
Kmods SIG d83023
+ * CALL instructions from relative offsets to absolute offsets.  After
Kmods SIG d83023
+ * match/literal decoding, the decompressor must undo the translation.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static void lzx_postprocess(u8 *data, u32 size)
Kmods SIG d83023
+{
Kmods SIG d83023
+	/*
Kmods SIG d83023
+	 * A worthwhile optimization is to push the end-of-buffer check into the
Kmods SIG d83023
+	 * relatively rare E8 case.  This is possible if we replace the last six
Kmods SIG d83023
+	 * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte
Kmods SIG d83023
+	 * before reaching end-of-buffer.  In addition, this scheme guarantees
Kmods SIG d83023
+	 * that no translation can begin following an E8 byte in the last 10
Kmods SIG d83023
+	 * bytes because a 4-byte offset containing E8 as its high byte is a
Kmods SIG d83023
+	 * large negative number that is not valid for translation.  That is
Kmods SIG d83023
+	 * exactly what we need.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	u8 *tail;
Kmods SIG d83023
+	u8 saved_bytes[6];
Kmods SIG d83023
+	u8 *p;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (size <= 10)
Kmods SIG d83023
+		return;
Kmods SIG d83023
+
Kmods SIG d83023
+	tail = &data[size - 6];
Kmods SIG d83023
+	memcpy(saved_bytes, tail, 6);
Kmods SIG d83023
+	memset(tail, 0xE8, 6);
Kmods SIG d83023
+	p = data;
Kmods SIG d83023
+	for (;;) {
Kmods SIG d83023
+		while (*p != 0xE8)
Kmods SIG d83023
+			p++;
Kmods SIG d83023
+		if (p >= tail)
Kmods SIG d83023
+			break;
Kmods SIG d83023
+		undo_e8_translation(p + 1, p - data);
Kmods SIG d83023
+		p += 5;
Kmods SIG d83023
+	}
Kmods SIG d83023
+	memcpy(tail, saved_bytes, 6);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read a Huffman-encoded symbol using the precode.  */
Kmods SIG d83023
+static forceinline u32 read_presym(const struct lzx_decompressor *d,
Kmods SIG d83023
+					struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return read_huffsym(is, d->precode_decode_table,
Kmods SIG d83023
+			    LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read a Huffman-encoded symbol using the main code.  */
Kmods SIG d83023
+static forceinline u32 read_mainsym(const struct lzx_decompressor *d,
Kmods SIG d83023
+					 struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return read_huffsym(is, d->maincode_decode_table,
Kmods SIG d83023
+			    LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read a Huffman-encoded symbol using the length code.  */
Kmods SIG d83023
+static forceinline u32 read_lensym(const struct lzx_decompressor *d,
Kmods SIG d83023
+					struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return read_huffsym(is, d->lencode_decode_table,
Kmods SIG d83023
+			    LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Read a Huffman-encoded symbol using the aligned offset code.  */
Kmods SIG d83023
+static forceinline u32 read_alignedsym(const struct lzx_decompressor *d,
Kmods SIG d83023
+					    struct input_bitstream *is)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return read_huffsym(is, d->alignedcode_decode_table,
Kmods SIG d83023
+			    LZX_ALIGNEDCODE_TABLEBITS,
Kmods SIG d83023
+			    LZX_MAX_ALIGNED_CODEWORD_LEN);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * Read the precode from the compressed input bitstream, then use it to decode
Kmods SIG d83023
+ * @num_lens codeword length values.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @is:		The input bitstream.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @lens:	An array that contains the length values from the previous time
Kmods SIG d83023
+ *		the codeword lengths for this Huffman code were read, or all 0's
Kmods SIG d83023
+ *		if this is the first time.  This array must have at least
Kmods SIG d83023
+ *		(@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @num_lens:	Number of length values to decode.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Returns 0 on success, or -1 if the data was invalid.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static int lzx_read_codeword_lens(struct lzx_decompressor *d,
Kmods SIG d83023
+				  struct input_bitstream *is,
Kmods SIG d83023
+				  u8 *lens, u32 num_lens)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u8 *len_ptr = lens;
Kmods SIG d83023
+	u8 *lens_end = lens + num_lens;
Kmods SIG d83023
+	int i;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Read the lengths of the precode codewords.  These are given
Kmods SIG d83023
+	 * explicitly.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) {
Kmods SIG d83023
+		d->precode_lens[i] =
Kmods SIG d83023
+			bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE);
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Make the decoding table for the precode.  */
Kmods SIG d83023
+	if (make_huffman_decode_table(d->precode_decode_table,
Kmods SIG d83023
+				      LZX_PRECODE_NUM_SYMBOLS,
Kmods SIG d83023
+				      LZX_PRECODE_TABLEBITS,
Kmods SIG d83023
+				      d->precode_lens,
Kmods SIG d83023
+				      LZX_MAX_PRE_CODEWORD_LEN,
Kmods SIG d83023
+				      d->working_space))
Kmods SIG d83023
+		return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Decode the codeword lengths.  */
Kmods SIG d83023
+	do {
Kmods SIG d83023
+		u32 presym;
Kmods SIG d83023
+		u8 len;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Read the next precode symbol.  */
Kmods SIG d83023
+		presym = read_presym(d, is);
Kmods SIG d83023
+		if (presym < 17) {
Kmods SIG d83023
+			/* Difference from old length  */
Kmods SIG d83023
+			len = *len_ptr - presym;
Kmods SIG d83023
+			if ((s8)len < 0)
Kmods SIG d83023
+				len += 17;
Kmods SIG d83023
+			*len_ptr++ = len;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			/* Special RLE values  */
Kmods SIG d83023
+
Kmods SIG d83023
+			u32 run_len;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (presym == 17) {
Kmods SIG d83023
+				/* Run of 0's  */
Kmods SIG d83023
+				run_len = 4 + bitstream_read_bits(is, 4);
Kmods SIG d83023
+				len = 0;
Kmods SIG d83023
+			} else if (presym == 18) {
Kmods SIG d83023
+				/* Longer run of 0's  */
Kmods SIG d83023
+				run_len = 20 + bitstream_read_bits(is, 5);
Kmods SIG d83023
+				len = 0;
Kmods SIG d83023
+			} else {
Kmods SIG d83023
+				/* Run of identical lengths  */
Kmods SIG d83023
+				run_len = 4 + bitstream_read_bits(is, 1);
Kmods SIG d83023
+				presym = read_presym(d, is);
Kmods SIG d83023
+				if (presym > 17)
Kmods SIG d83023
+					return -1;
Kmods SIG d83023
+				len = *len_ptr - presym;
Kmods SIG d83023
+				if ((s8)len < 0)
Kmods SIG d83023
+					len += 17;
Kmods SIG d83023
+			}
Kmods SIG d83023
+
Kmods SIG d83023
+			do {
Kmods SIG d83023
+				*len_ptr++ = len;
Kmods SIG d83023
+			} while (--run_len);
Kmods SIG d83023
+			/* Worst case overrun is when presym == 18,
Kmods SIG d83023
+			 * run_len == 20 + 31, and only 1 length was remaining.
Kmods SIG d83023
+			 * So LZX_READ_LENS_MAX_OVERRUN == 50.
Kmods SIG d83023
+			 *
Kmods SIG d83023
+			 * Overrun while reading the first half of maincode_lens
Kmods SIG d83023
+			 * can corrupt the previous values in the second half.
Kmods SIG d83023
+			 * This doesn't really matter because the resulting
Kmods SIG d83023
+			 * lengths will still be in range, and data that
Kmods SIG d83023
+			 * generates overruns is invalid anyway.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+		}
Kmods SIG d83023
+	} while (len_ptr < lens_end);
Kmods SIG d83023
+
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * Read the header of an LZX block and save the block type and (uncompressed)
Kmods SIG d83023
+ * size in *block_type_ret and *block_size_ret, respectively.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * If the block is compressed, also update the Huffman decode @tables with the
Kmods SIG d83023
+ * new Huffman codes.  If the block is uncompressed, also update the match
Kmods SIG d83023
+ * offset @queue with the new match offsets.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Return 0 on success, or -1 if the data was invalid.
Kmods SIG d83023
+ */
Kmods SIG d83023
+static int lzx_read_block_header(struct lzx_decompressor *d,
Kmods SIG d83023
+				 struct input_bitstream *is,
Kmods SIG d83023
+				 int *block_type_ret,
Kmods SIG d83023
+				 u32 *block_size_ret,
Kmods SIG d83023
+				 u32 recent_offsets[])
Kmods SIG d83023
+{
Kmods SIG d83023
+	int block_type;
Kmods SIG d83023
+	u32 block_size;
Kmods SIG d83023
+	int i;
Kmods SIG d83023
+
Kmods SIG d83023
+	bitstream_ensure_bits(is, 4);
Kmods SIG d83023
+
Kmods SIG d83023
+	/* The first three bits tell us what kind of block it is, and should be
Kmods SIG d83023
+	 * one of the LZX_BLOCKTYPE_* values.
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	block_type = bitstream_pop_bits(is, 3);
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Read the block size.  */
Kmods SIG d83023
+	if (bitstream_pop_bits(is, 1)) {
Kmods SIG d83023
+		block_size = LZX_DEFAULT_BLOCK_SIZE;
Kmods SIG d83023
+	} else {
Kmods SIG d83023
+		block_size = 0;
Kmods SIG d83023
+		block_size |= bitstream_read_bits(is, 8);
Kmods SIG d83023
+		block_size <<= 8;
Kmods SIG d83023
+		block_size |= bitstream_read_bits(is, 8);
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	switch (block_type) {
Kmods SIG d83023
+
Kmods SIG d83023
+	case LZX_BLOCKTYPE_ALIGNED:
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Read the aligned offset code and prepare its decode table.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+
Kmods SIG d83023
+		for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) {
Kmods SIG d83023
+			d->alignedcode_lens[i] =
Kmods SIG d83023
+				bitstream_read_bits(is,
Kmods SIG d83023
+						    LZX_ALIGNEDCODE_ELEMENT_SIZE);
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		if (make_huffman_decode_table(d->alignedcode_decode_table,
Kmods SIG d83023
+					      LZX_ALIGNEDCODE_NUM_SYMBOLS,
Kmods SIG d83023
+					      LZX_ALIGNEDCODE_TABLEBITS,
Kmods SIG d83023
+					      d->alignedcode_lens,
Kmods SIG d83023
+					      LZX_MAX_ALIGNED_CODEWORD_LEN,
Kmods SIG d83023
+					      d->working_space))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Fall though, since the rest of the header for aligned offset
Kmods SIG d83023
+		 * blocks is the same as that for verbatim blocks.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		fallthrough;
Kmods SIG d83023
+
Kmods SIG d83023
+	case LZX_BLOCKTYPE_VERBATIM:
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Read the main code and prepare its decode table.
Kmods SIG d83023
+		 *
Kmods SIG d83023
+		 * Note that the codeword lengths in the main code are encoded
Kmods SIG d83023
+		 * in two parts: one part for literal symbols, and one part for
Kmods SIG d83023
+		 * match symbols.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+
Kmods SIG d83023
+		if (lzx_read_codeword_lens(d, is, d->maincode_lens,
Kmods SIG d83023
+					   LZX_NUM_CHARS))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (lzx_read_codeword_lens(d, is,
Kmods SIG d83023
+					   d->maincode_lens + LZX_NUM_CHARS,
Kmods SIG d83023
+					   LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (make_huffman_decode_table(d->maincode_decode_table,
Kmods SIG d83023
+					      LZX_MAINCODE_NUM_SYMBOLS,
Kmods SIG d83023
+					      LZX_MAINCODE_TABLEBITS,
Kmods SIG d83023
+					      d->maincode_lens,
Kmods SIG d83023
+					      LZX_MAX_MAIN_CODEWORD_LEN,
Kmods SIG d83023
+					      d->working_space))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Read the length code and prepare its decode table.  */
Kmods SIG d83023
+
Kmods SIG d83023
+		if (lzx_read_codeword_lens(d, is, d->lencode_lens,
Kmods SIG d83023
+					   LZX_LENCODE_NUM_SYMBOLS))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (make_huffman_decode_table(d->lencode_decode_table,
Kmods SIG d83023
+					      LZX_LENCODE_NUM_SYMBOLS,
Kmods SIG d83023
+					      LZX_LENCODE_TABLEBITS,
Kmods SIG d83023
+					      d->lencode_lens,
Kmods SIG d83023
+					      LZX_MAX_LEN_CODEWORD_LEN,
Kmods SIG d83023
+					      d->working_space))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		break;
Kmods SIG d83023
+
Kmods SIG d83023
+	case LZX_BLOCKTYPE_UNCOMPRESSED:
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Before reading the three recent offsets from the uncompressed
Kmods SIG d83023
+		 * block header, the stream must be aligned on a 16-bit
Kmods SIG d83023
+		 * boundary.  But if the stream is *already* aligned, then the
Kmods SIG d83023
+		 * next 16 bits must be discarded.
Kmods SIG d83023
+		 */
Kmods SIG d83023
+		bitstream_ensure_bits(is, 1);
Kmods SIG d83023
+		bitstream_align(is);
Kmods SIG d83023
+
Kmods SIG d83023
+		recent_offsets[0] = bitstream_read_u32(is);
Kmods SIG d83023
+		recent_offsets[1] = bitstream_read_u32(is);
Kmods SIG d83023
+		recent_offsets[2] = bitstream_read_u32(is);
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Offsets of 0 are invalid.  */
Kmods SIG d83023
+		if (recent_offsets[0] == 0 || recent_offsets[1] == 0 ||
Kmods SIG d83023
+		    recent_offsets[2] == 0)
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+		break;
Kmods SIG d83023
+
Kmods SIG d83023
+	default:
Kmods SIG d83023
+		/* Unrecognized block type.  */
Kmods SIG d83023
+		return -1;
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	*block_type_ret = block_type;
Kmods SIG d83023
+	*block_size_ret = block_size;
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/* Decompress a block of LZX-compressed data.  */
Kmods SIG d83023
+static int lzx_decompress_block(const struct lzx_decompressor *d,
Kmods SIG d83023
+				struct input_bitstream *is,
Kmods SIG d83023
+				int block_type, u32 block_size,
Kmods SIG d83023
+				u8 * const out_begin, u8 *out_next,
Kmods SIG d83023
+				u32 recent_offsets[])
Kmods SIG d83023
+{
Kmods SIG d83023
+	u8 * const block_end = out_next + block_size;
Kmods SIG d83023
+	u32 ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED);
Kmods SIG d83023
+
Kmods SIG d83023
+	do {
Kmods SIG d83023
+		u32 mainsym;
Kmods SIG d83023
+		u32 match_len;
Kmods SIG d83023
+		u32 match_offset;
Kmods SIG d83023
+		u32 offset_slot;
Kmods SIG d83023
+		u32 num_extra_bits;
Kmods SIG d83023
+
Kmods SIG d83023
+		mainsym = read_mainsym(d, is);
Kmods SIG d83023
+		if (mainsym < LZX_NUM_CHARS) {
Kmods SIG d83023
+			/* Literal  */
Kmods SIG d83023
+			*out_next++ = mainsym;
Kmods SIG d83023
+			continue;
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Match  */
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Decode the length header and offset slot.  */
Kmods SIG d83023
+		mainsym -= LZX_NUM_CHARS;
Kmods SIG d83023
+		match_len = mainsym % LZX_NUM_LEN_HEADERS;
Kmods SIG d83023
+		offset_slot = mainsym / LZX_NUM_LEN_HEADERS;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* If needed, read a length symbol to decode the full length. */
Kmods SIG d83023
+		if (match_len == LZX_NUM_PRIMARY_LENS)
Kmods SIG d83023
+			match_len += read_lensym(d, is);
Kmods SIG d83023
+		match_len += LZX_MIN_MATCH_LEN;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (offset_slot < LZX_NUM_RECENT_OFFSETS) {
Kmods SIG d83023
+			/* Repeat offset  */
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Note: This isn't a real LRU queue, since using the R2
Kmods SIG d83023
+			 * offset doesn't bump the R1 offset down to R2.  This
Kmods SIG d83023
+			 * quirk allows all 3 recent offsets to be handled by
Kmods SIG d83023
+			 * the same code.  (For R0, the swap is a no-op.)
Kmods SIG d83023
+			 */
Kmods SIG d83023
+			match_offset = recent_offsets[offset_slot];
Kmods SIG d83023
+			recent_offsets[offset_slot] = recent_offsets[0];
Kmods SIG d83023
+			recent_offsets[0] = match_offset;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			/* Explicit offset  */
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Look up the number of extra bits that need to be read
Kmods SIG d83023
+			 * to decode offsets with this offset slot.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+			num_extra_bits = lzx_extra_offset_bits[offset_slot];
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Start with the offset slot base value.  */
Kmods SIG d83023
+			match_offset = lzx_offset_slot_base[offset_slot];
Kmods SIG d83023
+
Kmods SIG d83023
+			/* In aligned offset blocks, the low-order 3 bits of
Kmods SIG d83023
+			 * each offset are encoded using the aligned offset
Kmods SIG d83023
+			 * code.  Otherwise, all the extra bits are literal.
Kmods SIG d83023
+			 */
Kmods SIG d83023
+
Kmods SIG d83023
+			if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) {
Kmods SIG d83023
+				match_offset +=
Kmods SIG d83023
+					bitstream_read_bits(is, num_extra_bits -
Kmods SIG d83023
+								LZX_NUM_ALIGNED_OFFSET_BITS)
Kmods SIG d83023
+							<< LZX_NUM_ALIGNED_OFFSET_BITS;
Kmods SIG d83023
+				match_offset += read_alignedsym(d, is);
Kmods SIG d83023
+			} else {
Kmods SIG d83023
+				match_offset += bitstream_read_bits(is, num_extra_bits);
Kmods SIG d83023
+			}
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Adjust the offset.  */
Kmods SIG d83023
+			match_offset -= (LZX_NUM_RECENT_OFFSETS - 1);
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Update the recent offsets.  */
Kmods SIG d83023
+			recent_offsets[2] = recent_offsets[1];
Kmods SIG d83023
+			recent_offsets[1] = recent_offsets[0];
Kmods SIG d83023
+			recent_offsets[0] = match_offset;
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Validate the match, then copy it to the current position.  */
Kmods SIG d83023
+
Kmods SIG d83023
+		if (match_len > (size_t)(block_end - out_next))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (match_offset > (size_t)(out_next - out_begin))
Kmods SIG d83023
+			return -1;
Kmods SIG d83023
+
Kmods SIG d83023
+		out_next = lz_copy(out_next, match_len, match_offset,
Kmods SIG d83023
+				   block_end, LZX_MIN_MATCH_LEN);
Kmods SIG d83023
+
Kmods SIG d83023
+	} while (out_next != block_end);
Kmods SIG d83023
+
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * lzx_allocate_decompressor - Allocate an LZX decompressor
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Return the pointer to the decompressor on success, or return NULL and set
Kmods SIG d83023
+ * errno on failure.
Kmods SIG d83023
+ */
Kmods SIG d83023
+struct lzx_decompressor *lzx_allocate_decompressor(void)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return kmalloc(sizeof(struct lzx_decompressor), GFP_NOFS);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * lzx_decompress - Decompress a buffer of LZX-compressed data
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @decompressor:      A decompressor allocated with lzx_allocate_decompressor()
Kmods SIG d83023
+ * @compressed_data:	The buffer of data to decompress
Kmods SIG d83023
+ * @compressed_size:	Number of bytes of compressed data
Kmods SIG d83023
+ * @uncompressed_data:	The buffer in which to store the decompressed data
Kmods SIG d83023
+ * @uncompressed_size:	The number of bytes the data decompresses into
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Return 0 on success, or return -1 and set errno on failure.
Kmods SIG d83023
+ */
Kmods SIG d83023
+int lzx_decompress(struct lzx_decompressor *decompressor,
Kmods SIG d83023
+		   const void *compressed_data, size_t compressed_size,
Kmods SIG d83023
+		   void *uncompressed_data, size_t uncompressed_size)
Kmods SIG d83023
+{
Kmods SIG d83023
+	struct lzx_decompressor *d = decompressor;
Kmods SIG d83023
+	u8 * const out_begin = uncompressed_data;
Kmods SIG d83023
+	u8 *out_next = out_begin;
Kmods SIG d83023
+	u8 * const out_end = out_begin + uncompressed_size;
Kmods SIG d83023
+	struct input_bitstream is;
Kmods SIG d83023
+	u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1};
Kmods SIG d83023
+	int e8_status = 0;
Kmods SIG d83023
+
Kmods SIG d83023
+	init_input_bitstream(&is, compressed_data, compressed_size);
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Codeword lengths begin as all 0's for delta encoding purposes.  */
Kmods SIG d83023
+	memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS);
Kmods SIG d83023
+	memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS);
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Decompress blocks until we have all the uncompressed data.  */
Kmods SIG d83023
+
Kmods SIG d83023
+	while (out_next != out_end) {
Kmods SIG d83023
+		int block_type;
Kmods SIG d83023
+		u32 block_size;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (lzx_read_block_header(d, &is, &block_type, &block_size,
Kmods SIG d83023
+					  recent_offsets))
Kmods SIG d83023
+			goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (block_size < 1 || block_size > (size_t)(out_end - out_next))
Kmods SIG d83023
+			goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) {
Kmods SIG d83023
+
Kmods SIG d83023
+			/* Compressed block  */
Kmods SIG d83023
+
Kmods SIG d83023
+			if (lzx_decompress_block(d,
Kmods SIG d83023
+						 &is,
Kmods SIG d83023
+						 block_type,
Kmods SIG d83023
+						 block_size,
Kmods SIG d83023
+						 out_begin,
Kmods SIG d83023
+						 out_next,
Kmods SIG d83023
+						 recent_offsets))
Kmods SIG d83023
+				goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+			e8_status |= d->maincode_lens[0xe8];
Kmods SIG d83023
+			out_next += block_size;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			/* Uncompressed block  */
Kmods SIG d83023
+
Kmods SIG d83023
+			out_next = bitstream_read_bytes(&is, out_next,
Kmods SIG d83023
+							block_size);
Kmods SIG d83023
+			if (!out_next)
Kmods SIG d83023
+				goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (block_size & 1)
Kmods SIG d83023
+				bitstream_read_byte(&is);
Kmods SIG d83023
+
Kmods SIG d83023
+			e8_status = 1;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */
Kmods SIG d83023
+	if (e8_status)
Kmods SIG d83023
+		lzx_postprocess(uncompressed_data, uncompressed_size);
Kmods SIG d83023
+
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+
Kmods SIG d83023
+invalid:
Kmods SIG d83023
+	return -1;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * lzx_free_decompressor - Free an LZX decompressor
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @decompressor:       A decompressor that was allocated with
Kmods SIG d83023
+ *			lzx_allocate_decompressor(), or NULL.
Kmods SIG d83023
+ */
Kmods SIG d83023
+void lzx_free_decompressor(struct lzx_decompressor *decompressor)
Kmods SIG d83023
+{
Kmods SIG d83023
+	kfree(decompressor);
Kmods SIG d83023
+}
Kmods SIG d83023
diff --git a/src/lib/xpress_decompress.c b/src/lib/xpress_decompress.c
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..3d98f36a981e672d4d924b5fcbaf655d18d96355
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lib/xpress_decompress.c
Kmods SIG d83023
@@ -0,0 +1,155 @@
Kmods SIG d83023
+// SPDX-License-Identifier: GPL-2.0-or-later
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * xpress_decompress.c - A decompressor for the XPRESS compression format
Kmods SIG d83023
+ * (Huffman variant), which can be used in "System Compressed" files.  This is
Kmods SIG d83023
+ * based on the code from wimlib.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Copyright (C) 2015 Eric Biggers
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is free software: you can redistribute it and/or modify it under
Kmods SIG d83023
+ * the terms of the GNU General Public License as published by the Free Software
Kmods SIG d83023
+ * Foundation, either version 2 of the License, or (at your option) any later
Kmods SIG d83023
+ * version.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * This program is distributed in the hope that it will be useful, but WITHOUT
Kmods SIG d83023
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
Kmods SIG d83023
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
Kmods SIG d83023
+ * details.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * You should have received a copy of the GNU General Public License along with
Kmods SIG d83023
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
Kmods SIG d83023
+ */
Kmods SIG d83023
+
Kmods SIG d83023
+#include "decompress_common.h"
Kmods SIG d83023
+#include "lib.h"
Kmods SIG d83023
+
Kmods SIG d83023
+#define XPRESS_NUM_SYMBOLS	512
Kmods SIG d83023
+#define XPRESS_MAX_CODEWORD_LEN	15
Kmods SIG d83023
+#define XPRESS_MIN_MATCH_LEN	3
Kmods SIG d83023
+
Kmods SIG d83023
+/* This value is chosen for fast decompression.  */
Kmods SIG d83023
+#define XPRESS_TABLEBITS 12
Kmods SIG d83023
+
Kmods SIG d83023
+/* Reusable heap-allocated memory for XPRESS decompression  */
Kmods SIG d83023
+struct xpress_decompressor {
Kmods SIG d83023
+
Kmods SIG d83023
+	/* The Huffman decoding table  */
Kmods SIG d83023
+	u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS];
Kmods SIG d83023
+
Kmods SIG d83023
+	/* An array that maps symbols to codeword lengths  */
Kmods SIG d83023
+	u8 lens[XPRESS_NUM_SYMBOLS];
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Temporary space for make_huffman_decode_table()  */
Kmods SIG d83023
+	u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) +
Kmods SIG d83023
+			  XPRESS_NUM_SYMBOLS];
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * xpress_allocate_decompressor - Allocate an XPRESS decompressor
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Return the pointer to the decompressor on success, or return NULL and set
Kmods SIG d83023
+ * errno on failure.
Kmods SIG d83023
+ */
Kmods SIG d83023
+struct xpress_decompressor *xpress_allocate_decompressor(void)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * xpress_decompress - Decompress a buffer of XPRESS-compressed data
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @decompressor:       A decompressor that was allocated with
Kmods SIG d83023
+ *			xpress_allocate_decompressor()
Kmods SIG d83023
+ * @compressed_data:	The buffer of data to decompress
Kmods SIG d83023
+ * @compressed_size:	Number of bytes of compressed data
Kmods SIG d83023
+ * @uncompressed_data:	The buffer in which to store the decompressed data
Kmods SIG d83023
+ * @uncompressed_size:	The number of bytes the data decompresses into
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Return 0 on success, or return -1 and set errno on failure.
Kmods SIG d83023
+ */
Kmods SIG d83023
+int xpress_decompress(struct xpress_decompressor *decompressor,
Kmods SIG d83023
+		      const void *compressed_data, size_t compressed_size,
Kmods SIG d83023
+		      void *uncompressed_data, size_t uncompressed_size)
Kmods SIG d83023
+{
Kmods SIG d83023
+	struct xpress_decompressor *d = decompressor;
Kmods SIG d83023
+	const u8 * const in_begin = compressed_data;
Kmods SIG d83023
+	u8 * const out_begin = uncompressed_data;
Kmods SIG d83023
+	u8 *out_next = out_begin;
Kmods SIG d83023
+	u8 * const out_end = out_begin + uncompressed_size;
Kmods SIG d83023
+	struct input_bitstream is;
Kmods SIG d83023
+	u32 i;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Read the Huffman codeword lengths.  */
Kmods SIG d83023
+	if (compressed_size < XPRESS_NUM_SYMBOLS / 2)
Kmods SIG d83023
+		goto invalid;
Kmods SIG d83023
+	for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) {
Kmods SIG d83023
+		d->lens[i*2 + 0] = in_begin[i] & 0xF;
Kmods SIG d83023
+		d->lens[i*2 + 1] = in_begin[i] >> 4;
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Build a decoding table for the Huffman code.  */
Kmods SIG d83023
+	if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS,
Kmods SIG d83023
+				      XPRESS_TABLEBITS, d->lens,
Kmods SIG d83023
+				      XPRESS_MAX_CODEWORD_LEN,
Kmods SIG d83023
+				      d->working_space))
Kmods SIG d83023
+		goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Decode the matches and literals.  */
Kmods SIG d83023
+
Kmods SIG d83023
+	init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2,
Kmods SIG d83023
+			     compressed_size - XPRESS_NUM_SYMBOLS / 2);
Kmods SIG d83023
+
Kmods SIG d83023
+	while (out_next != out_end) {
Kmods SIG d83023
+		u32 sym;
Kmods SIG d83023
+		u32 log2_offset;
Kmods SIG d83023
+		u32 length;
Kmods SIG d83023
+		u32 offset;
Kmods SIG d83023
+
Kmods SIG d83023
+		sym = read_huffsym(&is, d->decode_table,
Kmods SIG d83023
+				   XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN);
Kmods SIG d83023
+		if (sym < 256) {
Kmods SIG d83023
+			/* Literal  */
Kmods SIG d83023
+			*out_next++ = sym;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			/* Match  */
Kmods SIG d83023
+			length = sym & 0xf;
Kmods SIG d83023
+			log2_offset = (sym >> 4) & 0xf;
Kmods SIG d83023
+
Kmods SIG d83023
+			bitstream_ensure_bits(&is, 16);
Kmods SIG d83023
+
Kmods SIG d83023
+			offset = ((u32)1 << log2_offset) |
Kmods SIG d83023
+				 bitstream_pop_bits(&is, log2_offset);
Kmods SIG d83023
+
Kmods SIG d83023
+			if (length == 0xf) {
Kmods SIG d83023
+				length += bitstream_read_byte(&is);
Kmods SIG d83023
+				if (length == 0xf + 0xff)
Kmods SIG d83023
+					length = bitstream_read_u16(&is);
Kmods SIG d83023
+			}
Kmods SIG d83023
+			length += XPRESS_MIN_MATCH_LEN;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (offset > (size_t)(out_next - out_begin))
Kmods SIG d83023
+				goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (length > (size_t)(out_end - out_next))
Kmods SIG d83023
+				goto invalid;
Kmods SIG d83023
+
Kmods SIG d83023
+			out_next = lz_copy(out_next, length, offset, out_end,
Kmods SIG d83023
+					   XPRESS_MIN_MATCH_LEN);
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+
Kmods SIG d83023
+invalid:
Kmods SIG d83023
+	return -1;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * xpress_free_decompressor - Free an XPRESS decompressor
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * @decompressor:       A decompressor that was allocated with
Kmods SIG d83023
+ *			xpress_allocate_decompressor(), or NULL.
Kmods SIG d83023
+ */
Kmods SIG d83023
+void xpress_free_decompressor(struct xpress_decompressor *decompressor)
Kmods SIG d83023
+{
Kmods SIG d83023
+	kfree(decompressor);
Kmods SIG d83023
+}
Kmods SIG d83023
diff --git a/src/lznt.c b/src/lznt.c
Kmods SIG d83023
new file mode 100644
Kmods SIG d83023
index 0000000000000000000000000000000000000000..ead9ab7d69b3082e429ac79b973aab461d3cfd7a
Kmods SIG d83023
--- /dev/null
Kmods SIG d83023
+++ b/src/lznt.c
Kmods SIG d83023
@@ -0,0 +1,452 @@
Kmods SIG d83023
+// SPDX-License-Identifier: GPL-2.0
Kmods SIG d83023
+/*
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
Kmods SIG d83023
+ *
Kmods SIG d83023
+ */
Kmods SIG d83023
+#include <linux/blkdev.h>
Kmods SIG d83023
+#include <linux/buffer_head.h>
Kmods SIG d83023
+#include <linux/fs.h>
Kmods SIG d83023
+#include <linux/nls.h>
Kmods SIG d83023
+
Kmods SIG d83023
+#include "debug.h"
Kmods SIG d83023
+#include "ntfs.h"
Kmods SIG d83023
+#include "ntfs_fs.h"
Kmods SIG d83023
+
Kmods SIG d83023
+// clang-format off
Kmods SIG d83023
+/* src buffer is zero */
Kmods SIG d83023
+#define LZNT_ERROR_ALL_ZEROS	1
Kmods SIG d83023
+#define LZNT_CHUNK_SIZE		0x1000
Kmods SIG d83023
+// clang-format on
Kmods SIG d83023
+
Kmods SIG d83023
+struct lznt_hash {
Kmods SIG d83023
+	const u8 *p1;
Kmods SIG d83023
+	const u8 *p2;
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+struct lznt {
Kmods SIG d83023
+	const u8 *unc;
Kmods SIG d83023
+	const u8 *unc_end;
Kmods SIG d83023
+	const u8 *best_match;
Kmods SIG d83023
+	size_t max_len;
Kmods SIG d83023
+	bool std;
Kmods SIG d83023
+
Kmods SIG d83023
+	struct lznt_hash hash[LZNT_CHUNK_SIZE];
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev,
Kmods SIG d83023
+				   size_t max_len)
Kmods SIG d83023
+{
Kmods SIG d83023
+	size_t len = 0;
Kmods SIG d83023
+
Kmods SIG d83023
+	while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len)
Kmods SIG d83023
+		;
Kmods SIG d83023
+	return len;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+static size_t longest_match_std(const u8 *src, struct lznt *ctx)
Kmods SIG d83023
+{
Kmods SIG d83023
+	size_t hash_index;
Kmods SIG d83023
+	size_t len1 = 0, len2 = 0;
Kmods SIG d83023
+	const u8 **hash;
Kmods SIG d83023
+
Kmods SIG d83023
+	hash_index =
Kmods SIG d83023
+		((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) &
Kmods SIG d83023
+		(LZNT_CHUNK_SIZE - 1);
Kmods SIG d83023
+
Kmods SIG d83023
+	hash = &(ctx->hash[hash_index].p1);
Kmods SIG d83023
+
Kmods SIG d83023
+	if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] &&
Kmods SIG d83023
+	    hash[0][1] == src[1] && hash[0][2] == src[2]) {
Kmods SIG d83023
+		len1 = 3;
Kmods SIG d83023
+		if (ctx->max_len > 3)
Kmods SIG d83023
+			len1 += get_match_len(src + 3, ctx->unc_end,
Kmods SIG d83023
+					      hash[0] + 3, ctx->max_len - 3);
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] &&
Kmods SIG d83023
+	    hash[1][1] == src[1] && hash[1][2] == src[2]) {
Kmods SIG d83023
+		len2 = 3;
Kmods SIG d83023
+		if (ctx->max_len > 3)
Kmods SIG d83023
+			len2 += get_match_len(src + 3, ctx->unc_end,
Kmods SIG d83023
+					      hash[1] + 3, ctx->max_len - 3);
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Compare two matches and select the best one */
Kmods SIG d83023
+	if (len1 < len2) {
Kmods SIG d83023
+		ctx->best_match = hash[1];
Kmods SIG d83023
+		len1 = len2;
Kmods SIG d83023
+	} else {
Kmods SIG d83023
+		ctx->best_match = hash[0];
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	hash[1] = hash[0];
Kmods SIG d83023
+	hash[0] = src;
Kmods SIG d83023
+	return len1;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+static size_t longest_match_best(const u8 *src, struct lznt *ctx)
Kmods SIG d83023
+{
Kmods SIG d83023
+	size_t max_len;
Kmods SIG d83023
+	const u8 *ptr;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (ctx->unc >= src || !ctx->max_len)
Kmods SIG d83023
+		return 0;
Kmods SIG d83023
+
Kmods SIG d83023
+	max_len = 0;
Kmods SIG d83023
+	for (ptr = ctx->unc; ptr < src; ++ptr) {
Kmods SIG d83023
+		size_t len =
Kmods SIG d83023
+			get_match_len(src, ctx->unc_end, ptr, ctx->max_len);
Kmods SIG d83023
+		if (len >= max_len) {
Kmods SIG d83023
+			max_len = len;
Kmods SIG d83023
+			ctx->best_match = ptr;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	return max_len >= 3 ? max_len : 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+static const size_t s_max_len[] = {
Kmods SIG d83023
+	0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12,
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+static const size_t s_max_off[] = {
Kmods SIG d83023
+	0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
Kmods SIG d83023
+};
Kmods SIG d83023
+
Kmods SIG d83023
+static inline u16 make_pair(size_t offset, size_t len, size_t index)
Kmods SIG d83023
+{
Kmods SIG d83023
+	return ((offset - 1) << (12 - index)) |
Kmods SIG d83023
+	       ((len - 3) & (((1 << (12 - index)) - 1)));
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+static inline size_t parse_pair(u16 pair, size_t *offset, size_t index)
Kmods SIG d83023
+{
Kmods SIG d83023
+	*offset = 1 + (pair >> (12 - index));
Kmods SIG d83023
+	return 3 + (pair & ((1 << (12 - index)) - 1));
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * compress_chunk
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * returns one of the three values:
Kmods SIG d83023
+ * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data
Kmods SIG d83023
+ * 1 - input buffer is full zero
Kmods SIG d83023
+ * -2 - the compressed buffer is too small to hold the compressed data
Kmods SIG d83023
+ */
Kmods SIG d83023
+static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *),
Kmods SIG d83023
+				 const u8 *unc, const u8 *unc_end, u8 *cmpr,
Kmods SIG d83023
+				 u8 *cmpr_end, size_t *cmpr_chunk_size,
Kmods SIG d83023
+				 struct lznt *ctx)
Kmods SIG d83023
+{
Kmods SIG d83023
+	size_t cnt = 0;
Kmods SIG d83023
+	size_t idx = 0;
Kmods SIG d83023
+	const u8 *up = unc;
Kmods SIG d83023
+	u8 *cp = cmpr + 3;
Kmods SIG d83023
+	u8 *cp2 = cmpr + 2;
Kmods SIG d83023
+	u8 not_zero = 0;
Kmods SIG d83023
+	/* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */
Kmods SIG d83023
+	u8 ohdr = 0;
Kmods SIG d83023
+	u8 *last;
Kmods SIG d83023
+	u16 t16;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (unc + LZNT_CHUNK_SIZE < unc_end)
Kmods SIG d83023
+		unc_end = unc + LZNT_CHUNK_SIZE;
Kmods SIG d83023
+
Kmods SIG d83023
+	last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end);
Kmods SIG d83023
+
Kmods SIG d83023
+	ctx->unc = unc;
Kmods SIG d83023
+	ctx->unc_end = unc_end;
Kmods SIG d83023
+	ctx->max_len = s_max_len[0];
Kmods SIG d83023
+
Kmods SIG d83023
+	while (up < unc_end) {
Kmods SIG d83023
+		size_t max_len;
Kmods SIG d83023
+
Kmods SIG d83023
+		while (unc + s_max_off[idx] < up)
Kmods SIG d83023
+			ctx->max_len = s_max_len[++idx];
Kmods SIG d83023
+
Kmods SIG d83023
+		// Find match
Kmods SIG d83023
+		max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (!max_len) {
Kmods SIG d83023
+			if (cp >= last)
Kmods SIG d83023
+				goto NotCompressed;
Kmods SIG d83023
+			not_zero |= *cp++ = *up++;
Kmods SIG d83023
+		} else if (cp + 1 >= last) {
Kmods SIG d83023
+			goto NotCompressed;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			t16 = make_pair(up - ctx->best_match, max_len, idx);
Kmods SIG d83023
+			*cp++ = t16;
Kmods SIG d83023
+			*cp++ = t16 >> 8;
Kmods SIG d83023
+
Kmods SIG d83023
+			ohdr |= 1 << cnt;
Kmods SIG d83023
+			up += max_len;
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		cnt = (cnt + 1) & 7;
Kmods SIG d83023
+		if (!cnt) {
Kmods SIG d83023
+			*cp2 = ohdr;
Kmods SIG d83023
+			ohdr = 0;
Kmods SIG d83023
+			cp2 = cp;
Kmods SIG d83023
+			cp += 1;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	if (cp2 < last)
Kmods SIG d83023
+		*cp2 = ohdr;
Kmods SIG d83023
+	else
Kmods SIG d83023
+		cp -= 1;
Kmods SIG d83023
+
Kmods SIG d83023
+	*cmpr_chunk_size = cp - cmpr;
Kmods SIG d83023
+
Kmods SIG d83023
+	t16 = (*cmpr_chunk_size - 3) | 0xB000;
Kmods SIG d83023
+	cmpr[0] = t16;
Kmods SIG d83023
+	cmpr[1] = t16 >> 8;
Kmods SIG d83023
+
Kmods SIG d83023
+	return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS;
Kmods SIG d83023
+
Kmods SIG d83023
+NotCompressed:
Kmods SIG d83023
+
Kmods SIG d83023
+	if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last)
Kmods SIG d83023
+		return -2;
Kmods SIG d83023
+
Kmods SIG d83023
+	/*
Kmods SIG d83023
+	 * Copy non cmpr data
Kmods SIG d83023
+	 * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000)
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	cmpr[0] = 0xff;
Kmods SIG d83023
+	cmpr[1] = 0x3f;
Kmods SIG d83023
+
Kmods SIG d83023
+	memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE);
Kmods SIG d83023
+	*cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short);
Kmods SIG d83023
+
Kmods SIG d83023
+	return 0;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr,
Kmods SIG d83023
+				       const u8 *cmpr_end)
Kmods SIG d83023
+{
Kmods SIG d83023
+	u8 *up = unc;
Kmods SIG d83023
+	u8 ch = *cmpr++;
Kmods SIG d83023
+	size_t bit = 0;
Kmods SIG d83023
+	size_t index = 0;
Kmods SIG d83023
+	u16 pair;
Kmods SIG d83023
+	size_t offset, length;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Do decompression until pointers are inside range */
Kmods SIG d83023
+	while (up < unc_end && cmpr < cmpr_end) {
Kmods SIG d83023
+		/* Correct index */
Kmods SIG d83023
+		while (unc + s_max_off[index] < up)
Kmods SIG d83023
+			index += 1;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check the current flag for zero */
Kmods SIG d83023
+		if (!(ch & (1 << bit))) {
Kmods SIG d83023
+			/* Just copy byte */
Kmods SIG d83023
+			*up++ = *cmpr++;
Kmods SIG d83023
+			goto next;
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check for boundary */
Kmods SIG d83023
+		if (cmpr + 1 >= cmpr_end)
Kmods SIG d83023
+			return -EINVAL;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Read a short from little endian stream */
Kmods SIG d83023
+		pair = cmpr[1];
Kmods SIG d83023
+		pair <<= 8;
Kmods SIG d83023
+		pair |= cmpr[0];
Kmods SIG d83023
+
Kmods SIG d83023
+		cmpr += 2;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Translate packed information into offset and length */
Kmods SIG d83023
+		length = parse_pair(pair, &offset, index);
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check offset for boundary */
Kmods SIG d83023
+		if (unc + offset > up)
Kmods SIG d83023
+			return -EINVAL;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Truncate the length if necessary */
Kmods SIG d83023
+		if (up + length >= unc_end)
Kmods SIG d83023
+			length = unc_end - up;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Now we copy bytes. This is the heart of LZ algorithm. */
Kmods SIG d83023
+		for (; length > 0; length--, up++)
Kmods SIG d83023
+			*up = *(up - offset);
Kmods SIG d83023
+
Kmods SIG d83023
+next:
Kmods SIG d83023
+		/* Advance flag bit value */
Kmods SIG d83023
+		bit = (bit + 1) & 7;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (!bit) {
Kmods SIG d83023
+			if (cmpr >= cmpr_end)
Kmods SIG d83023
+				break;
Kmods SIG d83023
+
Kmods SIG d83023
+			ch = *cmpr++;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* return the size of uncompressed data */
Kmods SIG d83023
+	return up - unc;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * 0 - standard compression
Kmods SIG d83023
+ * !0 - best compression, requires a lot of cpu
Kmods SIG d83023
+ */
Kmods SIG d83023
+struct lznt *get_lznt_ctx(int level)
Kmods SIG d83023
+{
Kmods SIG d83023
+	struct lznt *r = ntfs_zalloc(level ? offsetof(struct lznt, hash)
Kmods SIG d83023
+					   : sizeof(struct lznt));
Kmods SIG d83023
+
Kmods SIG d83023
+	if (r)
Kmods SIG d83023
+		r->std = !level;
Kmods SIG d83023
+	return r;
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * compress_lznt
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * Compresses "unc" into "cmpr"
Kmods SIG d83023
+ * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data
Kmods SIG d83023
+ * 0 - input buffer is full zero
Kmods SIG d83023
+ */
Kmods SIG d83023
+size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr,
Kmods SIG d83023
+		     size_t cmpr_size, struct lznt *ctx)
Kmods SIG d83023
+{
Kmods SIG d83023
+	int err;
Kmods SIG d83023
+	size_t (*match)(const u8 *src, struct lznt *ctx);
Kmods SIG d83023
+	u8 *p = cmpr;
Kmods SIG d83023
+	u8 *end = p + cmpr_size;
Kmods SIG d83023
+	const u8 *unc_chunk = unc;
Kmods SIG d83023
+	const u8 *unc_end = unc_chunk + unc_size;
Kmods SIG d83023
+	bool is_zero = true;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (ctx->std) {
Kmods SIG d83023
+		match = &longest_match_std;
Kmods SIG d83023
+		memset(ctx->hash, 0, sizeof(ctx->hash));
Kmods SIG d83023
+	} else {
Kmods SIG d83023
+		match = &longest_match_best;
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* compression cycle */
Kmods SIG d83023
+	for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) {
Kmods SIG d83023
+		cmpr_size = 0;
Kmods SIG d83023
+		err = compress_chunk(match, unc_chunk, unc_end, p, end,
Kmods SIG d83023
+				     &cmpr_size, ctx);
Kmods SIG d83023
+		if (err < 0)
Kmods SIG d83023
+			return unc_size;
Kmods SIG d83023
+
Kmods SIG d83023
+		if (is_zero && err != LZNT_ERROR_ALL_ZEROS)
Kmods SIG d83023
+			is_zero = false;
Kmods SIG d83023
+
Kmods SIG d83023
+		p += cmpr_size;
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	if (p <= end - 2)
Kmods SIG d83023
+		p[0] = p[1] = 0;
Kmods SIG d83023
+
Kmods SIG d83023
+	return is_zero ? 0 : PtrOffset(cmpr, p);
Kmods SIG d83023
+}
Kmods SIG d83023
+
Kmods SIG d83023
+/*
Kmods SIG d83023
+ * decompress_lznt
Kmods SIG d83023
+ *
Kmods SIG d83023
+ * decompresses "cmpr" into "unc"
Kmods SIG d83023
+ */
Kmods SIG d83023
+ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc,
Kmods SIG d83023
+			size_t unc_size)
Kmods SIG d83023
+{
Kmods SIG d83023
+	const u8 *cmpr_chunk = cmpr;
Kmods SIG d83023
+	const u8 *cmpr_end = cmpr_chunk + cmpr_size;
Kmods SIG d83023
+	u8 *unc_chunk = unc;
Kmods SIG d83023
+	u8 *unc_end = unc_chunk + unc_size;
Kmods SIG d83023
+	u16 chunk_hdr;
Kmods SIG d83023
+
Kmods SIG d83023
+	if (cmpr_size < sizeof(short))
Kmods SIG d83023
+		return -EINVAL;
Kmods SIG d83023
+
Kmods SIG d83023
+	/* read chunk header */
Kmods SIG d83023
+	chunk_hdr = cmpr_chunk[1];
Kmods SIG d83023
+	chunk_hdr <<= 8;
Kmods SIG d83023
+	chunk_hdr |= cmpr_chunk[0];
Kmods SIG d83023
+
Kmods SIG d83023
+	/* loop through decompressing chunks */
Kmods SIG d83023
+	for (;;) {
Kmods SIG d83023
+		size_t chunk_size_saved;
Kmods SIG d83023
+		size_t unc_use;
Kmods SIG d83023
+		size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1));
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check that the chunk actually fits the supplied buffer */
Kmods SIG d83023
+		if (cmpr_chunk + cmpr_use > cmpr_end)
Kmods SIG d83023
+			return -EINVAL;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* First make sure the chunk contains compressed data */
Kmods SIG d83023
+		if (chunk_hdr & 0x8000) {
Kmods SIG d83023
+			/* Decompress a chunk and return if we get an error */
Kmods SIG d83023
+			ssize_t err =
Kmods SIG d83023
+				decompress_chunk(unc_chunk, unc_end,
Kmods SIG d83023
+						 cmpr_chunk + sizeof(chunk_hdr),
Kmods SIG d83023
+						 cmpr_chunk + cmpr_use);
Kmods SIG d83023
+			if (err < 0)
Kmods SIG d83023
+				return err;
Kmods SIG d83023
+			unc_use = err;
Kmods SIG d83023
+		} else {
Kmods SIG d83023
+			/* This chunk does not contain compressed data */
Kmods SIG d83023
+			unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end
Kmods SIG d83023
+					  ? unc_end - unc_chunk
Kmods SIG d83023
+					  : LZNT_CHUNK_SIZE;
Kmods SIG d83023
+
Kmods SIG d83023
+			if (cmpr_chunk + sizeof(chunk_hdr) + unc_use >
Kmods SIG d83023
+			    cmpr_end) {
Kmods SIG d83023
+				return -EINVAL;
Kmods SIG d83023
+			}
Kmods SIG d83023
+
Kmods SIG d83023
+			memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr),
Kmods SIG d83023
+			       unc_use);
Kmods SIG d83023
+		}
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Advance pointers */
Kmods SIG d83023
+		cmpr_chunk += cmpr_use;
Kmods SIG d83023
+		unc_chunk += unc_use;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check for the end of unc buffer */
Kmods SIG d83023
+		if (unc_chunk >= unc_end)
Kmods SIG d83023
+			break;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Proceed the next chunk */
Kmods SIG d83023
+		if (cmpr_chunk > cmpr_end - 2)
Kmods SIG d83023
+			break;
Kmods SIG d83023
+
Kmods SIG d83023
+		chunk_size_saved = LZNT_CHUNK_SIZE;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* read chunk header */
Kmods SIG d83023
+		chunk_hdr = cmpr_chunk[1];
Kmods SIG d83023
+		chunk_hdr <<= 8;
Kmods SIG d83023
+		chunk_hdr |= cmpr_chunk[0];
Kmods SIG d83023
+
Kmods SIG d83023
+		if (!chunk_hdr)
Kmods SIG d83023
+			break;
Kmods SIG d83023
+
Kmods SIG d83023
+		/* Check the size of unc buffer */
Kmods SIG d83023
+		if (unc_use < chunk_size_saved) {
Kmods SIG d83023
+			size_t t1 = chunk_size_saved - unc_use;
Kmods SIG d83023
+			u8 *t2 = unc_chunk + t1;
Kmods SIG d83023
+
Kmods SIG d83023
+			/* 'Zero' memory */
Kmods SIG d83023
+			if (t2 >= unc_end)
Kmods SIG d83023
+				break;
Kmods SIG d83023
+
Kmods SIG d83023
+			memset(unc_chunk, 0, t1);
Kmods SIG d83023
+			unc_chunk = t2;
Kmods SIG d83023
+		}
Kmods SIG d83023
+	}
Kmods SIG d83023
+
Kmods SIG d83023
+	/* Check compression boundary */
Kmods SIG d83023
+	if (cmpr_chunk > cmpr_end)
Kmods SIG d83023
+		return -EINVAL;
Kmods SIG d83023
+
Kmods SIG d83023
+	/*
Kmods SIG d83023
+	 * The unc size is just a difference between current
Kmods SIG d83023
+	 * pointer and original one
Kmods SIG d83023
+	 */
Kmods SIG d83023
+	return PtrOffset(unc, unc_chunk);
Kmods SIG d83023
+}
Kmods SIG d83023
-- 
Kmods SIG d83023
2.31.1
Kmods SIG d83023