Blame SOURCES/file-5.39-regex-caching-1.patch

8a9804
diff --git a/src/apprentice.c b/src/apprentice.c
8a9804
index b609dd1..21eac1e 100644
8a9804
--- a/src/apprentice.c
8a9804
+++ b/src/apprentice.c
8a9804
@@ -423,7 +423,15 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
8a9804
 	ml->map = idx == 0 ? map : NULL;
8a9804
 	ml->magic = map->magic[idx];
8a9804
 	ml->nmagic = map->nmagic[idx];
8a9804
-
8a9804
+	if (ml->nmagic) {
8a9804
+		ml->magic_rxcomp = CAST(file_regex_t **,
8a9804
+		    calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
8a9804
+		if (ml->magic_rxcomp == NULL) {
8a9804
+			free(ml);
8a9804
+			return -1;
8a9804
+		}
8a9804
+	} else
8a9804
+		ml->magic_rxcomp = NULL;
8a9804
 	mlp->prev->next = ml;
8a9804
 	ml->prev = mlp->prev;
8a9804
 	ml->next = mlp;
8a9804
@@ -607,8 +615,19 @@ mlist_free_all(struct magic_set *ms)
8a9804
 private void
8a9804
 mlist_free_one(struct mlist *ml)
8a9804
 {
8a9804
+	size_t i;
8a9804
+
8a9804
 	if (ml->map)
8a9804
 		apprentice_unmap(CAST(struct magic_map *, ml->map));
8a9804
+
8a9804
+	for (i = 0; i < ml->nmagic; ++i) {
8a9804
+		if (ml->magic_rxcomp[i]) {
8a9804
+			file_regfree(ml->magic_rxcomp[i]);
8a9804
+			free(ml->magic_rxcomp[i]);
8a9804
+		}
8a9804
+	}
8a9804
+	free(ml->magic_rxcomp);
8a9804
+	ml->magic_rxcomp = NULL;
8a9804
 	free(ml);
8a9804
 }
8a9804
 
8a9804
@@ -3492,16 +3511,16 @@ file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
8a9804
 
8a9804
 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
8a9804
 		struct magic *ma = ml->magic;
8a9804
-		uint32_t nma = ml->nmagic;
8a9804
-		for (i = 0; i < nma; i++) {
8a9804
+		for (i = 0; i < ml->nmagic; i++) {
8a9804
 			if (ma[i].type != FILE_NAME)
8a9804
 				continue;
8a9804
 			if (strcmp(ma[i].value.s, name) == 0) {
8a9804
 				v->magic = &ma[i];
8a9804
-				for (j = i + 1; j < nma; j++)
8a9804
+				for (j = i + 1; j < ml->nmagic; j++)
8a9804
 				    if (ma[j].cont_level == 0)
8a9804
 					    break;
8a9804
 				v->nmagic = j - i;
8a9804
+				v->magic_rxcomp = ml->magic_rxcomp;
8a9804
 				return 0;
8a9804
 			}
8a9804
 		}
8a9804
diff --git a/src/file.h b/src/file.h
8a9804
index 48f4b69..c0b5a7c 100644
8a9804
--- a/src/file.h
8a9804
+++ b/src/file.h
8a9804
@@ -88,6 +88,10 @@
8a9804
 /* Do this here and now, because struct stat gets re-defined on solaris */
8a9804
 #include <sys/stat.h>
8a9804
 #include <stdarg.h>
8a9804
+#include <locale.h>
8a9804
+#if defined(HAVE_XLOCALE_H)
8a9804
+#include <xlocale.h>
8a9804
+#endif
8a9804
 
8a9804
 #define ENABLE_CONDITIONALS
8a9804
 
8a9804
@@ -167,6 +171,19 @@
8a9804
 #define FILE_COMPILE	2
8a9804
 #define FILE_LIST	3
8a9804
 
8a9804
+typedef struct {
8a9804
+	const char *pat;
8a9804
+#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
8a9804
+#define USE_C_LOCALE
8a9804
+	locale_t old_lc_ctype;
8a9804
+	locale_t c_lc_ctype;
8a9804
+#else
8a9804
+	char *old_lc_ctype;
8a9804
+#endif
8a9804
+	int rc;
8a9804
+	regex_t rx;
8a9804
+} file_regex_t;
8a9804
+
8a9804
 struct buffer {
8a9804
 	int fd;
8a9804
 	struct stat st;
8a9804
@@ -394,7 +411,8 @@ struct magic {
8a9804
 /* list of magic entries */
8a9804
 struct mlist {
8a9804
 	struct magic *magic;		/* array of magic entries */
8a9804
-	uint32_t nmagic;		/* number of entries in array */
8a9804
+	file_regex_t **magic_rxcomp;	/* array of compiled regexps */
8a9804
+	size_t nmagic;			/* number of entries in array */
8a9804
 	void *map;			/* internal resources used by entry */
8a9804
 	struct mlist *next, *prev;
8a9804
 };
8a9804
@@ -554,23 +572,7 @@ protected void buffer_init(struct buffer *, int, const struct stat *,
8a9804
 protected void buffer_fini(struct buffer *);
8a9804
 protected int buffer_fill(const struct buffer *);
8a9804
 
8a9804
-#include <locale.h>
8a9804
-#if defined(HAVE_XLOCALE_H)
8a9804
-#include <xlocale.h>
8a9804
-#endif
8a9804
 
8a9804
-typedef struct {
8a9804
-	const char *pat;
8a9804
-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
8a9804
-#define USE_C_LOCALE
8a9804
-	locale_t old_lc_ctype;
8a9804
-	locale_t c_lc_ctype;
8a9804
-#else
8a9804
-	char *old_lc_ctype;
8a9804
-#endif
8a9804
-	int rc;
8a9804
-	regex_t rx;
8a9804
-} file_regex_t;
8a9804
 
8a9804
 protected int file_regcomp(file_regex_t *, const char *, int);
8a9804
 protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
8a9804
diff --git a/src/softmagic.c b/src/softmagic.c
8a9804
index 95061e5..834dfe3 100644
8a9804
--- a/src/softmagic.c
8a9804
+++ b/src/softmagic.c
8a9804
@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $")
8a9804
 #include <time.h>
8a9804
 #include "der.h"
8a9804
 
8a9804
-private int match(struct magic_set *, struct magic *, uint32_t,
8a9804
+private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t,
8a9804
     const struct buffer *, size_t, int, int, int, uint16_t *,
8a9804
     uint16_t *, int *, int *, int *, int *);
8a9804
 private int mget(struct magic_set *, struct magic *, const struct buffer *,
8a9804
@@ -52,7 +52,7 @@ private int mget(struct magic_set *, struct magic *, const struct buffer *,
8a9804
     uint16_t *, int *, int *, int *, int *);
8a9804
 private int msetoffset(struct magic_set *, struct magic *, struct buffer *,
8a9804
     const struct buffer *, size_t, unsigned int);
8a9804
-private int magiccheck(struct magic_set *, struct magic *);
8a9804
+private int magiccheck(struct magic_set *, struct magic *, file_regex_t **);
8a9804
 private int32_t mprint(struct magic_set *, struct magic *);
8a9804
 private int moffset(struct magic_set *, struct magic *, const struct buffer *,
8a9804
     int32_t *);
8a9804
@@ -131,8 +131,8 @@ file_softmagic(struct magic_set *ms, const struct buffer *b,
8a9804
 	}
8a9804
 
8a9804
 	for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next)
8a9804
-		if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode,
8a9804
-		    text, 0, indir_count, name_count,
8a9804
+		if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b,
8a9804
+		    0, mode, text, 0, indir_count, name_count,
8a9804
 		    &printed_something, &need_separator, NULL, NULL)) != 0)
8a9804
 			return rv;
8a9804
 
8a9804
@@ -191,8 +191,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def,
8a9804
  *	so that higher-level continuations are processed.
8a9804
  */
8a9804
 private int
8a9804
-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
8a9804
-    const struct buffer *b, size_t offset, int mode, int text,
8a9804
+match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp,
8a9804
+    uint32_t nmagic, const struct buffer *b, size_t offset, int mode, int text,
8a9804
     int flip, uint16_t *indir_count, uint16_t *name_count,
8a9804
     int *printed_something, int *need_separator, int *returnval,
8a9804
     int *found_match)
8a9804
@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
8a9804
 	for (magindex = 0; magindex < nmagic; magindex++) {
8a9804
 		int flush = 0;
8a9804
 		struct magic *m = &magic[magindex];
8a9804
+		file_regex_t **m_rxcomp = &magic_rxcomp[magindex];
8a9804
 
8a9804
 		if (m->type != FILE_NAME)
8a9804
 		if ((IS_STRING(m->type) &&
8a9804
@@ -257,7 +258,7 @@ flush:
8a9804
 				*returnval = 1;
8a9804
 			}
8a9804
 
8a9804
-			switch (magiccheck(ms, m)) {
8a9804
+			switch (magiccheck(ms, m, m_rxcomp)) {
8a9804
 			case -1:
8a9804
 				return -1;
8a9804
 			case 0:
8a9804
@@ -317,6 +318,7 @@ flush:
8a9804
 		while (magindex + 1 < nmagic &&
8a9804
 		    magic[magindex + 1].cont_level != 0) {
8a9804
 			m = &magic[++magindex];
8a9804
+			m_rxcomp = &magic_rxcomp[magindex];
8a9804
 			ms->line = m->lineno; /* for messages */
8a9804
 
8a9804
 			if (cont_level < m->cont_level)
8a9804
@@ -370,7 +372,7 @@ flush:
8a9804
 				break;
8a9804
 			}
8a9804
 
8a9804
-			switch (flush ? 1 : magiccheck(ms, m)) {
8a9804
+			switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) {
8a9804
 			case -1:
8a9804
 				return -1;
8a9804
 			case 0:
8a9804
@@ -1880,8 +1882,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
8a9804
 		oneed_separator = *need_separator;
8a9804
 		if (m->flag & NOSPACE)
8a9804
 			*need_separator = 0;
8a9804
-		rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
8a9804
-		    mode, text, flip, indir_count, name_count,
8a9804
+		rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b,
8a9804
+		    offset + o, mode, text, flip, indir_count, name_count,
8a9804
 		    printed_something, need_separator, returnval, found_match);
8a9804
 		(*name_count)--;
8a9804
 		if (rv != 1)
8a9804
@@ -1989,8 +1991,31 @@ file_strncmp16(const char *a, const char *b, size_t len, size_t maxlen,
8a9804
 	return file_strncmp(a, b, len, maxlen, flags);
8a9804
 }
8a9804
 
8a9804
+private file_regex_t *
8a9804
+alloc_regex(struct magic_set *ms, struct magic *m)
8a9804
+{
8a9804
+	int rc;
8a9804
+	file_regex_t *rx = CAST(file_regex_t *, malloc(sizeof(*rx)));
8a9804
+
8a9804
+	if (rx == NULL) {
8a9804
+		file_error(ms, errno, "can't allocate %" SIZE_T_FORMAT
8a9804
+		    "u bytes", sizeof(*rx));
8a9804
+		return NULL;
8a9804
+	}
8a9804
+
8a9804
+	rc = file_regcomp(rx, m->value.s, REG_EXTENDED | REG_NEWLINE |
8a9804
+	    ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
8a9804
+	if (rc == 0)
8a9804
+		return rx;
8a9804
+
8a9804
+	file_regerror(rx, rc, ms);
8a9804
+	file_regfree(rx);
8a9804
+	free(rx);
8a9804
+	return NULL;
8a9804
+}
8a9804
+
8a9804
 private int
8a9804
-magiccheck(struct magic_set *ms, struct magic *m)
8a9804
+magiccheck(struct magic_set *ms, struct magic *m, file_regex_t **m_cache)
8a9804
 {
8a9804
 	uint64_t l = m->value.q;
8a9804
 	uint64_t v;
8a9804
@@ -2068,8 +2093,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
8a9804
 			break;
8a9804
 
8a9804
 		default:
8a9804
-			file_magerror(ms, "cannot happen with float: invalid relation `%c'",
8a9804
-			    m->reln);
8a9804
+			file_magerror(ms, "cannot happen with float: "
8a9804
+			    "invalid relation `%c'", m->reln);
8a9804
 			return -1;
8a9804
 		}
8a9804
 		return matched;
8a9804
@@ -2101,7 +2126,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
8a9804
 			break;
8a9804
 
8a9804
 		default:
8a9804
-			file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
8a9804
+			file_magerror(ms, "cannot happen with double: "
8a9804
+			    "invalid relation `%c'", m->reln);
8a9804
 			return -1;
8a9804
 		}
8a9804
 		return matched;
8a9804
@@ -2169,62 +2195,57 @@ magiccheck(struct magic_set *ms, struct magic *m)
8a9804
 	}
8a9804
 	case FILE_REGEX: {
8a9804
 		int rc;
8a9804
-		file_regex_t rx;
8a9804
+		file_regex_t *rx = *m_cache;
8a9804
 		const char *search;
8a9804
+		regmatch_t pmatch;
8a9804
+		size_t slen = ms->search.s_len;
8a9804
+		char *copy;
8a9804
 
8a9804
 		if (ms->search.s == NULL)
8a9804
 			return 0;
8a9804
 
8a9804
+		if (rx == NULL) {
8a9804
+			rx = *m_cache = alloc_regex(ms, m);
8a9804
+			if (rx == NULL)
8a9804
+				return -1;
8a9804
+		}
8a9804
 		l = 0;
8a9804
-		rc = file_regcomp(&rx, m->value.s,
8a9804
-		    REG_EXTENDED|REG_NEWLINE|
8a9804
-		    ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
8a9804
-		if (rc) {
8a9804
-			file_regerror(&rx, rc, ms);
8a9804
-			v = CAST(uint64_t, -1);
8a9804
+		if (slen != 0) {
8a9804
+		    copy = CAST(char *, malloc(slen));
8a9804
+		    if (copy == NULL)  {
8a9804
+			file_error(ms, errno,
8a9804
+			    "can't allocate %" SIZE_T_FORMAT "u bytes",
8a9804
+			    slen);
8a9804
+			return -1;
8a9804
+		    }
8a9804
+		    memcpy(copy, ms->search.s, slen);
8a9804
+		    copy[--slen] = '\0';
8a9804
+		    search = copy;
8a9804
 		} else {
8a9804
-			regmatch_t pmatch;
8a9804
-			size_t slen = ms->search.s_len;
8a9804
-			char *copy;
8a9804
-			if (slen != 0) {
8a9804
-			    copy = CAST(char *, malloc(slen));
8a9804
-			    if (copy == NULL)  {
8a9804
-				file_regfree(&rx);
8a9804
-				file_error(ms, errno,
8a9804
-				    "can't allocate %" SIZE_T_FORMAT "u bytes",
8a9804
-				    slen);
8a9804
-				return -1;
8a9804
-			    }
8a9804
-			    memcpy(copy, ms->search.s, slen);
8a9804
-			    copy[--slen] = '\0';
8a9804
-			    search = copy;
8a9804
-			} else {
8a9804
-			    search = CCAST(char *, "");
8a9804
-			    copy = NULL;
8a9804
-			}
8a9804
-			rc = file_regexec(&rx, RCAST(const char *, search),
8a9804
-			    1, &pmatch, 0);
8a9804
-			free(copy);
8a9804
-			switch (rc) {
8a9804
-			case 0:
8a9804
-				ms->search.s += CAST(int, pmatch.rm_so);
8a9804
-				ms->search.offset += CAST(size_t, pmatch.rm_so);
8a9804
-				ms->search.rm_len = CAST(size_t, 
8a9804
-				    pmatch.rm_eo - pmatch.rm_so);
8a9804
-				v = 0;
8a9804
-				break;
8a9804
+		    search = CCAST(char *, "");
8a9804
+		    copy = NULL;
8a9804
+		}
8a9804
+		rc = file_regexec(rx, RCAST(const char *, search),
8a9804
+		    1, &pmatch, 0);
8a9804
+		free(copy);
8a9804
+		switch (rc) {
8a9804
+		case 0:
8a9804
+			ms->search.s += CAST(int, pmatch.rm_so);
8a9804
+			ms->search.offset += CAST(size_t, pmatch.rm_so);
8a9804
+			ms->search.rm_len = CAST(size_t,
8a9804
+			    pmatch.rm_eo - pmatch.rm_so);
8a9804
+			v = 0;
8a9804
+			break;
8a9804
 
8a9804
-			case REG_NOMATCH:
8a9804
-				v = 1;
8a9804
-				break;
8a9804
+		case REG_NOMATCH:
8a9804
+			v = 1;
8a9804
+			break;
8a9804
 
8a9804
-			default:
8a9804
-				file_regerror(&rx, rc, ms);
8a9804
-				v = CAST(uint64_t, -1);
8a9804
-				break;
8a9804
-			}
8a9804
+		default:
8a9804
+			file_regerror(rx, rc, ms);
8a9804
+			v = CAST(uint64_t, -1);
8a9804
+			break;
8a9804
 		}
8a9804
-		file_regfree(&rx);
8a9804
 		if (v == CAST(uint64_t, -1))
8a9804
 			return -1;
8a9804
 		break;