From 8a98047a4e571f136b29666ee801256fa628cb8a Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 15 2022 07:05:50 +0000 Subject: import file-5.39-10.el9 --- diff --git a/SOURCES/file-5.33-fix-compression.patch b/SOURCES/file-5.33-fix-compression.patch new file mode 100644 index 0000000..f5751b1 --- /dev/null +++ b/SOURCES/file-5.33-fix-compression.patch @@ -0,0 +1,45 @@ +diff --git a/src/compress.c b/src/compress.c +index ba1e3d7..6846d66 100644 +--- a/src/compress.c ++++ b/src/compress.c +@@ -932,23 +932,23 @@ uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old, + rv = OKDATA; + r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); + if (r <= 0) { +- DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], +- r != -1 ? strerror(errno) : "no data"); +- +- rv = ERRDATA; +- if (r == 0 && +- (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) +- { +- r = filter_error(*newch, r); +- goto ok; +- } +- free(*newch); +- if (r == 0) +- rv = makeerror(newch, n, "Read failed, %s", +- strerror(errno)); +- else +- rv = makeerror(newch, n, "No data"); +- goto err; ++ if (r < 0) { ++ rv = ERRDATA; ++ DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], ++ strerror(errno)); ++ goto err; ++ } else if ((r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0){ ++ rv = ERRDATA; ++ r = filter_error(*newch, r); ++ goto ok; ++ } ++ if (r == 0) ++ goto ok; ++ free(*newch); ++ rv = ERRDATA; ++ rv = makeerror(newch, n, "Read stderr failed, %s", ++ strerror(errno)); ++ goto err; + } + ok: + *n = r; diff --git a/SOURCES/file-5.39-regex-caching-1.patch b/SOURCES/file-5.39-regex-caching-1.patch new file mode 100644 index 0000000..f1738fe --- /dev/null +++ b/SOURCES/file-5.39-regex-caching-1.patch @@ -0,0 +1,377 @@ +diff --git a/src/apprentice.c b/src/apprentice.c +index b609dd1..21eac1e 100644 +--- a/src/apprentice.c ++++ b/src/apprentice.c +@@ -423,7 +423,15 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx) + ml->map = idx == 0 ? map : NULL; + ml->magic = map->magic[idx]; + ml->nmagic = map->nmagic[idx]; +- ++ if (ml->nmagic) { ++ ml->magic_rxcomp = CAST(file_regex_t **, ++ calloc(ml->nmagic, sizeof(*ml->magic_rxcomp))); ++ if (ml->magic_rxcomp == NULL) { ++ free(ml); ++ return -1; ++ } ++ } else ++ ml->magic_rxcomp = NULL; + mlp->prev->next = ml; + ml->prev = mlp->prev; + ml->next = mlp; +@@ -607,8 +615,19 @@ mlist_free_all(struct magic_set *ms) + private void + mlist_free_one(struct mlist *ml) + { ++ size_t i; ++ + if (ml->map) + apprentice_unmap(CAST(struct magic_map *, ml->map)); ++ ++ for (i = 0; i < ml->nmagic; ++i) { ++ if (ml->magic_rxcomp[i]) { ++ file_regfree(ml->magic_rxcomp[i]); ++ free(ml->magic_rxcomp[i]); ++ } ++ } ++ free(ml->magic_rxcomp); ++ ml->magic_rxcomp = NULL; + free(ml); + } + +@@ -3492,16 +3511,16 @@ file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) + + for (ml = mlist->next; ml != mlist; ml = ml->next) { + struct magic *ma = ml->magic; +- uint32_t nma = ml->nmagic; +- for (i = 0; i < nma; i++) { ++ for (i = 0; i < ml->nmagic; i++) { + if (ma[i].type != FILE_NAME) + continue; + if (strcmp(ma[i].value.s, name) == 0) { + v->magic = &ma[i]; +- for (j = i + 1; j < nma; j++) ++ for (j = i + 1; j < ml->nmagic; j++) + if (ma[j].cont_level == 0) + break; + v->nmagic = j - i; ++ v->magic_rxcomp = ml->magic_rxcomp; + return 0; + } + } +diff --git a/src/file.h b/src/file.h +index 48f4b69..c0b5a7c 100644 +--- a/src/file.h ++++ b/src/file.h +@@ -88,6 +88,10 @@ + /* Do this here and now, because struct stat gets re-defined on solaris */ + #include + #include ++#include ++#if defined(HAVE_XLOCALE_H) ++#include ++#endif + + #define ENABLE_CONDITIONALS + +@@ -167,6 +171,19 @@ + #define FILE_COMPILE 2 + #define FILE_LIST 3 + ++typedef struct { ++ const char *pat; ++#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) ++#define USE_C_LOCALE ++ locale_t old_lc_ctype; ++ locale_t c_lc_ctype; ++#else ++ char *old_lc_ctype; ++#endif ++ int rc; ++ regex_t rx; ++} file_regex_t; ++ + struct buffer { + int fd; + struct stat st; +@@ -394,7 +411,8 @@ struct magic { + /* list of magic entries */ + struct mlist { + struct magic *magic; /* array of magic entries */ +- uint32_t nmagic; /* number of entries in array */ ++ file_regex_t **magic_rxcomp; /* array of compiled regexps */ ++ size_t nmagic; /* number of entries in array */ + void *map; /* internal resources used by entry */ + struct mlist *next, *prev; + }; +@@ -554,23 +572,7 @@ protected void buffer_init(struct buffer *, int, const struct stat *, + protected void buffer_fini(struct buffer *); + protected int buffer_fill(const struct buffer *); + +-#include +-#if defined(HAVE_XLOCALE_H) +-#include +-#endif + +-typedef struct { +- const char *pat; +-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) +-#define USE_C_LOCALE +- locale_t old_lc_ctype; +- locale_t c_lc_ctype; +-#else +- char *old_lc_ctype; +-#endif +- int rc; +- regex_t rx; +-} file_regex_t; + + protected int file_regcomp(file_regex_t *, const char *, int); + protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *, +diff --git a/src/softmagic.c b/src/softmagic.c +index 95061e5..834dfe3 100644 +--- a/src/softmagic.c ++++ b/src/softmagic.c +@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $") + #include + #include "der.h" + +-private int match(struct magic_set *, struct magic *, uint32_t, ++private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t, + const struct buffer *, size_t, int, int, int, uint16_t *, + uint16_t *, int *, int *, int *, int *); + private int mget(struct magic_set *, struct magic *, const struct buffer *, +@@ -52,7 +52,7 @@ private int mget(struct magic_set *, struct magic *, const struct buffer *, + uint16_t *, int *, int *, int *, int *); + private int msetoffset(struct magic_set *, struct magic *, struct buffer *, + const struct buffer *, size_t, unsigned int); +-private int magiccheck(struct magic_set *, struct magic *); ++private int magiccheck(struct magic_set *, struct magic *, file_regex_t **); + private int32_t mprint(struct magic_set *, struct magic *); + private int moffset(struct magic_set *, struct magic *, const struct buffer *, + int32_t *); +@@ -131,8 +131,8 @@ file_softmagic(struct magic_set *ms, const struct buffer *b, + } + + for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next) +- if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode, +- text, 0, indir_count, name_count, ++ if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b, ++ 0, mode, text, 0, indir_count, name_count, + &printed_something, &need_separator, NULL, NULL)) != 0) + return rv; + +@@ -191,8 +191,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def, + * so that higher-level continuations are processed. + */ + private int +-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, +- const struct buffer *b, size_t offset, int mode, int text, ++match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp, ++ uint32_t nmagic, const struct buffer *b, size_t offset, int mode, int text, + int flip, uint16_t *indir_count, uint16_t *name_count, + int *printed_something, int *need_separator, int *returnval, + int *found_match) +@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, + for (magindex = 0; magindex < nmagic; magindex++) { + int flush = 0; + struct magic *m = &magic[magindex]; ++ file_regex_t **m_rxcomp = &magic_rxcomp[magindex]; + + if (m->type != FILE_NAME) + if ((IS_STRING(m->type) && +@@ -257,7 +258,7 @@ flush: + *returnval = 1; + } + +- switch (magiccheck(ms, m)) { ++ switch (magiccheck(ms, m, m_rxcomp)) { + case -1: + return -1; + case 0: +@@ -317,6 +318,7 @@ flush: + while (magindex + 1 < nmagic && + magic[magindex + 1].cont_level != 0) { + m = &magic[++magindex]; ++ m_rxcomp = &magic_rxcomp[magindex]; + ms->line = m->lineno; /* for messages */ + + if (cont_level < m->cont_level) +@@ -370,7 +372,7 @@ flush: + break; + } + +- switch (flush ? 1 : magiccheck(ms, m)) { ++ switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) { + case -1: + return -1; + case 0: +@@ -1880,8 +1882,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b, + oneed_separator = *need_separator; + if (m->flag & NOSPACE) + *need_separator = 0; +- rv = match(ms, ml.magic, ml.nmagic, b, offset + o, +- mode, text, flip, indir_count, name_count, ++ rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b, ++ offset + o, mode, text, flip, indir_count, name_count, + printed_something, need_separator, returnval, found_match); + (*name_count)--; + if (rv != 1) +@@ -1989,8 +1991,31 @@ file_strncmp16(const char *a, const char *b, size_t len, size_t maxlen, + return file_strncmp(a, b, len, maxlen, flags); + } + ++private file_regex_t * ++alloc_regex(struct magic_set *ms, struct magic *m) ++{ ++ int rc; ++ file_regex_t *rx = CAST(file_regex_t *, malloc(sizeof(*rx))); ++ ++ if (rx == NULL) { ++ file_error(ms, errno, "can't allocate %" SIZE_T_FORMAT ++ "u bytes", sizeof(*rx)); ++ return NULL; ++ } ++ ++ rc = file_regcomp(rx, m->value.s, REG_EXTENDED | REG_NEWLINE | ++ ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); ++ if (rc == 0) ++ return rx; ++ ++ file_regerror(rx, rc, ms); ++ file_regfree(rx); ++ free(rx); ++ return NULL; ++} ++ + private int +-magiccheck(struct magic_set *ms, struct magic *m) ++magiccheck(struct magic_set *ms, struct magic *m, file_regex_t **m_cache) + { + uint64_t l = m->value.q; + uint64_t v; +@@ -2068,8 +2093,8 @@ magiccheck(struct magic_set *ms, struct magic *m) + break; + + default: +- file_magerror(ms, "cannot happen with float: invalid relation `%c'", +- m->reln); ++ file_magerror(ms, "cannot happen with float: " ++ "invalid relation `%c'", m->reln); + return -1; + } + return matched; +@@ -2101,7 +2126,8 @@ magiccheck(struct magic_set *ms, struct magic *m) + break; + + default: +- file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln); ++ file_magerror(ms, "cannot happen with double: " ++ "invalid relation `%c'", m->reln); + return -1; + } + return matched; +@@ -2169,62 +2195,57 @@ magiccheck(struct magic_set *ms, struct magic *m) + } + case FILE_REGEX: { + int rc; +- file_regex_t rx; ++ file_regex_t *rx = *m_cache; + const char *search; ++ regmatch_t pmatch; ++ size_t slen = ms->search.s_len; ++ char *copy; + + if (ms->search.s == NULL) + return 0; + ++ if (rx == NULL) { ++ rx = *m_cache = alloc_regex(ms, m); ++ if (rx == NULL) ++ return -1; ++ } + l = 0; +- rc = file_regcomp(&rx, m->value.s, +- REG_EXTENDED|REG_NEWLINE| +- ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); +- if (rc) { +- file_regerror(&rx, rc, ms); +- v = CAST(uint64_t, -1); ++ if (slen != 0) { ++ copy = CAST(char *, malloc(slen)); ++ if (copy == NULL) { ++ file_error(ms, errno, ++ "can't allocate %" SIZE_T_FORMAT "u bytes", ++ slen); ++ return -1; ++ } ++ memcpy(copy, ms->search.s, slen); ++ copy[--slen] = '\0'; ++ search = copy; + } else { +- regmatch_t pmatch; +- size_t slen = ms->search.s_len; +- char *copy; +- if (slen != 0) { +- copy = CAST(char *, malloc(slen)); +- if (copy == NULL) { +- file_regfree(&rx); +- file_error(ms, errno, +- "can't allocate %" SIZE_T_FORMAT "u bytes", +- slen); +- return -1; +- } +- memcpy(copy, ms->search.s, slen); +- copy[--slen] = '\0'; +- search = copy; +- } else { +- search = CCAST(char *, ""); +- copy = NULL; +- } +- rc = file_regexec(&rx, RCAST(const char *, search), +- 1, &pmatch, 0); +- free(copy); +- switch (rc) { +- case 0: +- ms->search.s += CAST(int, pmatch.rm_so); +- ms->search.offset += CAST(size_t, pmatch.rm_so); +- ms->search.rm_len = CAST(size_t, +- pmatch.rm_eo - pmatch.rm_so); +- v = 0; +- break; ++ search = CCAST(char *, ""); ++ copy = NULL; ++ } ++ rc = file_regexec(rx, RCAST(const char *, search), ++ 1, &pmatch, 0); ++ free(copy); ++ switch (rc) { ++ case 0: ++ ms->search.s += CAST(int, pmatch.rm_so); ++ ms->search.offset += CAST(size_t, pmatch.rm_so); ++ ms->search.rm_len = CAST(size_t, ++ pmatch.rm_eo - pmatch.rm_so); ++ v = 0; ++ break; + +- case REG_NOMATCH: +- v = 1; +- break; ++ case REG_NOMATCH: ++ v = 1; ++ break; + +- default: +- file_regerror(&rx, rc, ms); +- v = CAST(uint64_t, -1); +- break; +- } ++ default: ++ file_regerror(rx, rc, ms); ++ v = CAST(uint64_t, -1); ++ break; + } +- file_regfree(&rx); + if (v == CAST(uint64_t, -1)) + return -1; + break; diff --git a/SOURCES/file-5.39-regex-caching-2.patch b/SOURCES/file-5.39-regex-caching-2.patch new file mode 100644 index 0000000..37d5339 --- /dev/null +++ b/SOURCES/file-5.39-regex-caching-2.patch @@ -0,0 +1,226 @@ +diff --git a/src/apprentice.c b/src/apprentice.c +index 21eac1e..781c5e1 100644 +--- a/src/apprentice.c ++++ b/src/apprentice.c +@@ -513,6 +513,9 @@ file_ms_free(struct magic_set *ms) + free(ms->o.pbuf); + free(ms->o.buf); + free(ms->c.li); ++#ifdef USE_C_LOCALE ++ freelocale(ms->c_lc_ctype); ++#endif + free(ms); + } + +@@ -551,6 +554,10 @@ file_ms_alloc(int flags) + ms->elf_notes_max = FILE_ELF_NOTES_MAX; + ms->regex_max = FILE_REGEX_MAX; + ms->bytes_max = FILE_BYTES_MAX; ++#ifdef USE_C_LOCALE ++ ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); ++ assert(ms->c_lc_ctype != NULL); ++#endif + return ms; + free: + free(ms); +@@ -624,6 +631,7 @@ mlist_free_one(struct mlist *ml) + if (ml->magic_rxcomp[i]) { + file_regfree(ml->magic_rxcomp[i]); + free(ml->magic_rxcomp[i]); ++ ml->magic_rxcomp[i] = NULL; + } + } + free(ml->magic_rxcomp); +@@ -2714,7 +2722,8 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) + } + if (m->type == FILE_REGEX) { + file_regex_t rx; +- int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED); ++ int rc = file_regcomp(ms, &rx, m->value.s, ++ REG_EXTENDED); + if (rc) { + if (ms->flags & MAGIC_CHECK) + file_regerror(&rx, rc, ms); +diff --git a/src/file.h b/src/file.h +index c0b5a7c..f049446 100644 +--- a/src/file.h ++++ b/src/file.h +@@ -173,13 +173,6 @@ + + typedef struct { + const char *pat; +-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) +-#define USE_C_LOCALE +- locale_t old_lc_ctype; +- locale_t c_lc_ctype; +-#else +- char *old_lc_ctype; +-#endif + int rc; + regex_t rx; + } file_regex_t; +@@ -487,6 +480,10 @@ struct magic_set { + #define FILE_INDIR_MAX 50 + #define FILE_NAME_MAX 50 + #define FILE_REGEX_MAX 8192 ++#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE) ++#define USE_C_LOCALE ++ locale_t c_lc_ctype; ++#endif + }; + + /* Type for Unicode characters */ +@@ -574,9 +571,10 @@ protected int buffer_fill(const struct buffer *); + + + +-protected int file_regcomp(file_regex_t *, const char *, int); +-protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *, ++protected int file_regcomp(struct magic_set *, file_regex_t *, const char *, + int); ++protected int file_regexec(struct magic_set *, file_regex_t *, const char *, ++ size_t, regmatch_t *, int); + protected void file_regfree(file_regex_t *); + protected void file_regerror(file_regex_t *, int, struct magic_set *); + +diff --git a/src/funcs.c b/src/funcs.c +index 6320cf2..1391a44 100644 +--- a/src/funcs.c ++++ b/src/funcs.c +@@ -613,13 +613,13 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep) + file_regex_t rx; + int rc, rv = -1; + +- rc = file_regcomp(&rx, pat, REG_EXTENDED); ++ rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); + if (rc) { + file_regerror(&rx, rc, ms); + } else { + regmatch_t rm; + int nm = 0; +- while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) { ++ while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { + ms->o.buf[rm.rm_so] = '\0'; + if (file_printf(ms, "%s%s", rep, + rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1) +@@ -634,34 +634,52 @@ out: + } + + protected int +-file_regcomp(file_regex_t *rx, const char *pat, int flags) ++file_regcomp(struct magic_set *ms, file_regex_t *rx, const char *pat, int flags) + { + #ifdef USE_C_LOCALE +- rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0); +- assert(rx->c_lc_ctype != NULL); +- rx->old_lc_ctype = uselocale(rx->c_lc_ctype); +- assert(rx->old_lc_ctype != NULL); ++ locale_t old = uselocale(ms->c_lc_ctype); ++ assert(old != NULL); + #else +- rx->old_lc_ctype = setlocale(LC_CTYPE, NULL); +- assert(rx->old_lc_ctype != NULL); +- rx->old_lc_ctype = strdup(rx->old_lc_ctype); +- assert(rx->old_lc_ctype != NULL); ++ char old[1024]; ++ strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); + (void)setlocale(LC_CTYPE, "C"); + #endif + rx->pat = pat; + +- return rx->rc = regcomp(&rx->rx, pat, flags); ++ rx->rc = regcomp(&rx->rx, pat, flags); ++ ++#ifdef USE_C_LOCALE ++ uselocale(old); ++#else ++ (void)setlocale(LC_CTYPE, old); ++#endif ++ return rx->rc; + } + + protected int +-file_regexec(file_regex_t *rx, const char *str, size_t nmatch, +- regmatch_t* pmatch, int eflags) ++file_regexec(struct magic_set *ms, file_regex_t *rx, const char *str, ++ size_t nmatch, regmatch_t* pmatch, int eflags) + { ++#ifdef USE_C_LOCALE ++ locale_t old = uselocale(ms->c_lc_ctype); ++ assert(old != NULL); ++#else ++ char old[1024]; ++ strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); ++ (void)setlocale(LC_CTYPE, "C"); ++#endif ++ int rc; + assert(rx->rc == 0); + /* XXX: force initialization because glibc does not always do this */ + if (nmatch != 0) + memset(pmatch, 0, nmatch * sizeof(*pmatch)); +- return regexec(&rx->rx, str, nmatch, pmatch, eflags); ++ rc = regexec(&rx->rx, str, nmatch, pmatch, eflags); ++#ifdef USE_C_LOCALE ++ uselocale(old); ++#else ++ (void)setlocale(LC_CTYPE, old); ++#endif ++ return rc; + } + + protected void +@@ -669,13 +687,6 @@ file_regfree(file_regex_t *rx) + { + if (rx->rc == 0) + regfree(&rx->rx); +-#ifdef USE_C_LOCALE +- (void)uselocale(rx->old_lc_ctype); +- freelocale(rx->c_lc_ctype); +-#else +- (void)setlocale(LC_CTYPE, rx->old_lc_ctype); +- free(rx->old_lc_ctype); +-#endif + } + + protected void +diff --git a/src/softmagic.c b/src/softmagic.c +index 43338fc..b4052a6 100644 +--- a/src/softmagic.c ++++ b/src/softmagic.c +@@ -478,11 +478,11 @@ check_fmt(struct magic_set *ms, const char *fmt) + if (strchr(fmt, '%') == NULL) + return 0; + +- rc = file_regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); ++ rc = file_regcomp(ms, &rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); + if (rc) { + file_regerror(&rx, rc, ms); + } else { +- rc = file_regexec(&rx, fmt, 0, 0, 0); ++ rc = file_regexec(ms, &rx, fmt, 0, 0, 0); + rv = !rc; + } + file_regfree(&rx); +@@ -2003,11 +2003,12 @@ alloc_regex(struct magic_set *ms, struct magic *m) + return NULL; + } + +- rc = file_regcomp(rx, m->value.s, REG_EXTENDED | REG_NEWLINE | ++ rc = file_regcomp(ms, rx, m->value.s, REG_EXTENDED | REG_NEWLINE | + ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); + if (rc == 0) + return rx; + ++fprintf(stderr, "regcomp %s %d\n", m->value.s, rc); + file_regerror(rx, rc, ms); + file_regfree(rx); + free(rx); +@@ -2225,7 +2226,7 @@ magiccheck(struct magic_set *ms, struct magic *m, file_regex_t **m_cache) + search = CCAST(char *, ""); + copy = NULL; + } +- rc = file_regexec(rx, RCAST(const char *, search), ++ rc = file_regexec(ms, rx, RCAST(const char *, search), + 1, &pmatch, 0); + free(copy); + switch (rc) { diff --git a/SOURCES/file-5.39-regex-caching-3.patch b/SOURCES/file-5.39-regex-caching-3.patch new file mode 100644 index 0000000..0a9e0a0 --- /dev/null +++ b/SOURCES/file-5.39-regex-caching-3.patch @@ -0,0 +1,28 @@ +From d1a00ae92b2cf09298615cf3aba474d8fec7380f Mon Sep 17 00:00:00 2001 +From: Christos Zoulas +Date: Mon, 18 Apr 2022 21:46:43 +0000 +Subject: [PATCH] From Dirk Mueller: + +when name/use was used, the regex caching table was incorrectly +initialized, which led to false or missing matches. +--- + src/apprentice.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/apprentice.c b/src/apprentice.c +index 804c0e33e..992102b4e 100644 +--- a/src/apprentice.c ++++ b/src/apprentice.c +@@ -3678,11 +3678,11 @@ file_magicfind(struct magic_set *ms, const char *name, struct mlist *v) + continue; + if (strcmp(ma[i].value.s, name) == 0) { + v->magic = &ma[i]; ++ v->magic_rxcomp = &(ml->magic_rxcomp[i]); + for (j = i + 1; j < ml->nmagic; j++) + if (ma[j].cont_level == 0) + break; + v->nmagic = j - i; +- v->magic_rxcomp = ml->magic_rxcomp; + return 0; + } + } diff --git a/SOURCES/file-5.39-regex-caching-4.patch b/SOURCES/file-5.39-regex-caching-4.patch new file mode 100644 index 0000000..41b6fd0 --- /dev/null +++ b/SOURCES/file-5.39-regex-caching-4.patch @@ -0,0 +1,164 @@ +diff --git a/src/apprentice.c b/src/apprentice.c +index 781c5e1..50a91cf 100644 +--- a/src/apprentice.c ++++ b/src/apprentice.c +@@ -2724,11 +2724,9 @@ getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) + file_regex_t rx; + int rc = file_regcomp(ms, &rx, m->value.s, + REG_EXTENDED); +- if (rc) { +- if (ms->flags & MAGIC_CHECK) +- file_regerror(&rx, rc, ms); ++ if (rc == 0) { ++ file_regfree(&rx); + } +- file_regfree(&rx); + return rc ? -1 : 0; + } + return 0; +diff --git a/src/file.h b/src/file.h +index f049446..ee15855 100644 +--- a/src/file.h ++++ b/src/file.h +@@ -171,11 +171,7 @@ + #define FILE_COMPILE 2 + #define FILE_LIST 3 + +-typedef struct { +- const char *pat; +- int rc; +- regex_t rx; +-} file_regex_t; ++typedef regex_t file_regex_t; + + struct buffer { + int fd; +@@ -576,7 +572,6 @@ protected int file_regcomp(struct magic_set *, file_regex_t *, const char *, + protected int file_regexec(struct magic_set *, file_regex_t *, const char *, + size_t, regmatch_t *, int); + protected void file_regfree(file_regex_t *); +-protected void file_regerror(file_regex_t *, int, struct magic_set *); + + typedef struct { + char *buf; +diff --git a/src/funcs.c b/src/funcs.c +index 1391a44..df436eb 100644 +--- a/src/funcs.c ++++ b/src/funcs.c +@@ -614,9 +614,7 @@ file_replace(struct magic_set *ms, const char *pat, const char *rep) + int rc, rv = -1; + + rc = file_regcomp(ms, &rx, pat, REG_EXTENDED); +- if (rc) { +- file_regerror(&rx, rc, ms); +- } else { ++ if (rc == 0) { + regmatch_t rm; + int nm = 0; + while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) { +@@ -644,16 +642,22 @@ file_regcomp(struct magic_set *ms, file_regex_t *rx, const char *pat, int flags) + strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old)); + (void)setlocale(LC_CTYPE, "C"); + #endif +- rx->pat = pat; +- +- rx->rc = regcomp(&rx->rx, pat, flags); ++ int rc; ++ rc = regcomp(rx, pat, flags); + + #ifdef USE_C_LOCALE + uselocale(old); + #else + (void)setlocale(LC_CTYPE, old); + #endif +- return rx->rc; ++ if (rc > 0 && (ms->flags & MAGIC_CHECK)) { ++ char errmsg[512]; ++ ++ (void)regerror(rc, rx, errmsg, sizeof(errmsg)); ++ file_magerror(ms, "regex error %d for `%s', (%s)", rc, pat, ++ errmsg); ++ } ++ return rc; + } + + protected int +@@ -669,11 +673,10 @@ file_regexec(struct magic_set *ms, file_regex_t *rx, const char *str, + (void)setlocale(LC_CTYPE, "C"); + #endif + int rc; +- assert(rx->rc == 0); + /* XXX: force initialization because glibc does not always do this */ + if (nmatch != 0) + memset(pmatch, 0, nmatch * sizeof(*pmatch)); +- rc = regexec(&rx->rx, str, nmatch, pmatch, eflags); ++ rc = regexec(rx, str, nmatch, pmatch, eflags); + #ifdef USE_C_LOCALE + uselocale(old); + #else +@@ -685,18 +688,7 @@ file_regexec(struct magic_set *ms, file_regex_t *rx, const char *str, + protected void + file_regfree(file_regex_t *rx) + { +- if (rx->rc == 0) +- regfree(&rx->rx); +-} +- +-protected void +-file_regerror(file_regex_t *rx, int rc, struct magic_set *ms) +-{ +- char errmsg[512]; +- +- (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg)); +- file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat, +- errmsg); ++ regfree(rx); + } + + protected file_pushbuf_t * +diff --git a/src/softmagic.c b/src/softmagic.c +index b4052a6..f469a12 100644 +--- a/src/softmagic.c ++++ b/src/softmagic.c +@@ -474,14 +474,13 @@ check_fmt(struct magic_set *ms, const char *fmt) + { + file_regex_t rx; + int rc, rv = -1; ++ const char* pat = "%[-0-9\\.]*s"; + + if (strchr(fmt, '%') == NULL) + return 0; + +- rc = file_regcomp(ms, &rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); +- if (rc) { +- file_regerror(&rx, rc, ms); +- } else { ++ rc = file_regcomp(ms, &rx, pat, REG_EXTENDED|REG_NOSUB); ++ if (rc == 0) { + rc = file_regexec(ms, &rx, fmt, 0, 0, 0); + rv = !rc; + } +@@ -2008,9 +2007,6 @@ alloc_regex(struct magic_set *ms, struct magic *m) + if (rc == 0) + return rx; + +-fprintf(stderr, "regcomp %s %d\n", m->value.s, rc); +- file_regerror(rx, rc, ms); +- file_regfree(rx); + free(rx); + return NULL; + } +@@ -2243,12 +2239,9 @@ magiccheck(struct magic_set *ms, struct magic *m, file_regex_t **m_cache) + break; + + default: +- file_regerror(rx, rc, ms); +- v = CAST(uint64_t, -1); ++ return -1; + break; + } +- if (v == CAST(uint64_t, -1)) +- return -1; + break; + } + case FILE_INDIRECT: diff --git a/SOURCES/file-5.39-regex-combinations.patch b/SOURCES/file-5.39-regex-combinations.patch new file mode 100644 index 0000000..4852762 --- /dev/null +++ b/SOURCES/file-5.39-regex-combinations.patch @@ -0,0 +1,58 @@ +From a4b3abc4104d8a4eeb84a6242f2f1a830204a539 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas +Date: Sat, 12 Mar 2022 15:09:47 +0000 +Subject: [PATCH] Combine regex's to improve performance adjusting strength to + preserve ranking (Dirk Mueller) + +--- + magic/Magdir/make | 29 +++++++---------------------- + 1 file changed, 7 insertions(+), 22 deletions(-) + +diff --git a/magic/Magdir/make b/magic/Magdir/make +index f522b4f18..1abdf7a3e 100644 +--- a/magic/Magdir/make ++++ b/magic/Magdir/make +@@ -1,36 +1,21 @@ + #------------------------------------------------------------------------------ +-# $File: make,v 1.4 2018/05/29 17:26:02 christos Exp $ ++# $File: make,v 1.5 2022/03/12 15:09:47 christos Exp $ + # make: file(1) magic for makefiles + # + # URL: https://en.wikipedia.org/wiki/Make_(software) +-0 regex/100l \^CFLAGS makefile script text +-!:mime text/x-makefile +-0 regex/100l \^VPATH makefile script text +-!:mime text/x-makefile +-0 regex/100l \^LDFLAGS makefile script text +-!:mime text/x-makefile +-0 regex/100l \^all: makefile script text +-!:mime text/x-makefile +-0 regex/100l \^\\.PRECIOUS makefile script text ++0 regex/100l \^(CFLAGS|VPATH|LDFLAGS|all:|\\.PRECIOUS) makefile script text + !:mime text/x-makefile ++!:strength -15 + # Update: Joerg Jenderek + # Reference: https://www.freebsd.org/cgi/man.cgi?make(1) + # exclude grub-core\lib\libgcrypt\mpi\Makefile.am with "#BEGIN_ASM_LIST" + # by additional escaping point character +-0 regex/100l \^\\.BEGIN BSD makefile script text +-!:mime text/x-makefile +-!:ext /mk +-!:strength +10 + # exclude MS Windows help file CoNtenT with ":include FOOBAR.CNT" + # and NSIS script with "!include" by additional escaping point character +-0 regex/100l \^\\.include BSD makefile script text +-!:mime text/x-makefile +-!:ext /mk +-!:strength +10 +-0 regex/100l \^\\.endif BSD makefile script text ++0 regex/100l \^\\.(BEGIN|endif|include) BSD makefile script text + !:mime text/x-makefile + !:ext /mk +-!:strength +10 +-0 regex/100l \^SUBDIRS automake makefile script text ++!:strength -10 ++0 regex/100l \^SUBDIRS[[:space:]]+= automake makefile script text + !:mime text/x-makefile +-!:strength +10 ++!:strength -15 diff --git a/SOURCES/file-5.39-regex-escape.patch b/SOURCES/file-5.39-regex-escape.patch new file mode 100644 index 0000000..e6471b2 --- /dev/null +++ b/SOURCES/file-5.39-regex-escape.patch @@ -0,0 +1,86 @@ +diff --git a/magic/Magdir/archive b/magic/Magdir/archive +index 99798b0..945f536 100644 +--- a/magic/Magdir/archive ++++ b/magic/Magdir/archive +@@ -150,7 +150,7 @@ + # Incremental snapshot gnu-tar format from: + # https://www.gnu.org/software/tar/manual/html_node/Snapshot-Files.html + 0 string GNU\ tar- GNU tar incremental snapshot data +->&0 regex [0-9]\.[0-9]+-[0-9]+ version %s ++>&0 regex [0-9]\\.[0-9]+-[0-9]+ version %s + + # cpio archives + # +diff --git a/magic/Magdir/ctf b/magic/Magdir/ctf +index ebea8f3..d91684d 100644 +--- a/magic/Magdir/ctf ++++ b/magic/Magdir/ctf +@@ -20,4 +20,4 @@ + # CTF metadata (plain text) + 0 string /*\x20CTF\x20 Common Trace Format (CTF) plain text metadata + !:strength + 5 # this is to make sure we beat C +->&0 regex [0-9]+\.[0-9]+ \b, v%s ++>&0 regex [0-9]+\\.[0-9]+ \b, v%s +diff --git a/magic/Magdir/geo b/magic/Magdir/geo +index d72e514..dda5f73 100644 +--- a/magic/Magdir/geo ++++ b/magic/Magdir/geo +@@ -1,6 +1,6 @@ + + #------------------------------------------------------------------------------ +-# $File: geo,v 1.7 2019/04/19 00:42:27 christos Exp $ ++# $File: geo,v 1.8 2022/03/24 15:48:58 christos Exp $ + # Geo- files from Kurt Schwehr + + ###################################################################### +@@ -28,8 +28,8 @@ + # Knudsen subbottom chirp profiler - Binary File Format: B9 + # KEB D409-03167 V1.75 Huffman + 0 string KEB\ Knudsen seismic KEL binary (KEB) - +->4 regex [-A-Z0-9]* Software: %s +->>&1 regex V[0-9]*\.[0-9]* version %s ++>4 regex [-A-Z0-9]+ Software: %s ++>>&1 regex V[0-9]+\\.[0-9]+ version %s + + ###################################################################### + # +@@ -40,7 +40,7 @@ + + # Caris LIDAR format for LADS comes as two parts... ascii location file and binary waveform data + 0 string HCA LADS Caris Ascii Format (CAF) bathymetric lidar +->4 regex [0-9]*\.[0-9]* version %s ++>4 regex [0-9]+\\.[0-9]+ version %s + + 0 string HCB LADS Caris Binary Format (CBF) bathymetric lidar waveform data + >3 byte x version %d . +@@ -69,7 +69,7 @@ + + # mb121 https://www.saic.com/maritime/gsf/ + 8 string GSF-v SAIC generic sensor format (GSF) sonar data, +->&0 regex [0-9]*\.[0-9]* version %s ++>&0 regex [0-9]+\\.[0-9]+ version %s + + # MGD77 - https://www.ngdc.noaa.gov/mgg/dat/geodas/docs/mgd77.htm + # mb161 +diff --git a/magic/Magdir/windows b/magic/Magdir/windows +index 8a7923f..1247e03 100644 +--- a/magic/Magdir/windows ++++ b/magic/Magdir/windows +@@ -358,7 +358,7 @@ + # skip space at beginning + >0 string \040 + # name without extension and greater character or name with hlp extension +->>1 regex/c \^([^\xd>]*|.*\.hlp) MS Windows help file Content, based "%s" ++>>1 regex/c \^([^\xd>]*|.*\\.hlp) MS Windows help file Content, based "%s" + !:mime text/plain + !:apple ????TEXT + !:ext cnt +@@ -535,7 +535,7 @@ + # http://www.winfaq.de/faq_html/Content/tip2500/onlinefaq.php?h=tip2653.htm + # https://msdn.microsoft.com/en-us/library/windows/desktop/cc144102.aspx + # .ShellClassInfo DeleteOnCopy LocalizedFileNames ASCII coded case-independent +->>&0 regex/c \^(\.ShellClassInfo|DeleteOnCopy|LocalizedFileNames)] Windows desktop.ini ++>>&0 regex/1024c \^(\\.ShellClassInfo|DeleteOnCopy|LocalizedFileNames)] Windows desktop.ini + !:mime application/x-wine-extension-ini + #!:mime text/plain + # https://support.microsoft.com/kb/84709/ diff --git a/SOURCES/file-5.39-regex-optimalizations.patch b/SOURCES/file-5.39-regex-optimalizations.patch new file mode 100644 index 0000000..d7ccc4e --- /dev/null +++ b/SOURCES/file-5.39-regex-optimalizations.patch @@ -0,0 +1,79 @@ +From 14b5d7aa0b55275969809fdf84e8a8caee857c0f Mon Sep 17 00:00:00 2001 +From: Christos Zoulas +Date: Mon, 18 Apr 2022 21:38:10 +0000 +Subject: [PATCH] From Dirk Mueller: * regex rules need literal dots escaped, + otherwise they are considered any character * literal search strings can be + searched using search rather than the much more expensive regex * use + standard xml declaration search as used in other format matchers * only match + the first 1024 bytes, the information we look for should be in the very + first tag * remove unnecessary parentheses + +--- + magic/Magdir/dataone | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone +index 8ef3f7981..566633eff 100644 +--- a/magic/Magdir/dataone ++++ b/magic/Magdir/dataone +@@ -1,6 +1,6 @@ + + #------------------------------------------------------------------------------ +-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $ ++# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $ + # + # DataONE- files from Dave Vieglais & + # Pratik Shrivastava +@@ -9,39 +9,39 @@ + #------------------------------------------------------------------------------ + + # EML (Ecological Metadata Language Format) +-0 string &0 regex (eml)-[0-9].[0-9].[0-9]+ eml://ecoinformatics.org/%s ++0 string \&0 regex/1024 eml-[0-9]\\.[0-9]\\.[0-9]+ eml://ecoinformatics.org/%s + + # onedcx (DataONE Dublin Core Extended v1.0) +->&0 regex (onedcx/v)[0-9].[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0 ++>&0 regex/1024 onedcx/v[0-9]\\.[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0 + + # FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata, + # version 001-1998) +->&0 regex fgdc FGDC-STD-001-1998 ++>&0 search/1024 fgdc FGDC-STD-001-1998 + + # Mercury (Oak Ridge National Lab Mercury Metadata version 1.0) +->&0 regex (mercury/terms/v)[0-9].[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0 ++>&0 regex/1024 mercury/terms/v[0-9]\\.[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0 + + # ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language) +->&0 regex isotc211 +->>&0 regex eng;USA https://www.isotc211.org/2005/gmd ++>&0 search/1024 isotc211 ++>>&0 search/1024 eng;USA https://www.isotc211.org/2005/gmd + + # ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language) +->>&0 regex gov.noaa.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa ++>>&0 regex/1024 gov\\.noaa\\.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa + + # ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language +->>&0 regex pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea ++>>&0 regex/1024 pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea + !:mime text/xml + + + # Object Reuse and Exchange Vocabulary +-0 string &0 regex rdf +->>&0 regex openarchives https://www.openarchives.org/ore/terms ++0 string \&0 search/1024 rdf ++>>&0 search/1024 openarchives https://www.openarchives.org/ore/terms + !:mime application/rdf+xml + + + # Dryad Metadata Application Profile Version 3.1 + 0 string &0 regex (dryad-bibo/v)[0-9].[0-9] https://datadryad.org/profile/v3.1 ++>&0 regex/1024 dryad-bibo/v[0-9]\\.[0-9] https://datadryad.org/profile/v3.1 + !:mime text/xml diff --git a/SPECS/file.spec b/SPECS/file.spec index 2d6edf7..bd51b91 100644 --- a/SPECS/file.spec +++ b/SPECS/file.spec @@ -15,7 +15,7 @@ Summary: Utility for determining file types Name: file Version: 5.39 -Release: 8%{?dist} +Release: 10%{?dist} License: BSD Source0: http://ftp.astron.com/pub/file/file-%{version}.tar.gz @@ -32,6 +32,24 @@ Patch3: file-5.39-CLOEXEC.patch # Upstream commit 7d9b0f0d853957ad88dae0f440fecd58d2740ca7 (#1963892) Patch4: file-5.40-magic-python.patch +# not yet upstream +Patch5: file-5.33-fix-compression.patch + +# Upstream commit 4c8a4d8dbab1e73bfb30e391dcec49fcf269f84d (#2120692) +Patch6: file-5.39-regex-caching-1.patch +# Upstream commit 7d438e28c16773e28a3707935c8e5d9927a515a7 (#2120692) +Patch7: file-5.39-regex-caching-2.patch +# Upstream commit d1a00ae92b2cf09298615cf3aba474d8fec7380f (#2120692) +Patch8: file-5.39-regex-caching-3.patch +# Upstream commit 4254a0afecfd2ae200c694dfcf93f4b4ac21652e (#2120692) +Patch9: file-5.39-regex-caching-4.patch +# Upstream commit a4b3abc4104d8a4eeb84a6242f2f1a830204a539 (#2120692) +Patch10: file-5.39-regex-combinations.patch +# Upstream commit d17d8e9ff8ad8e95fdf66239ccdcc2133d1ce5ce (#2120692) +Patch11: file-5.39-regex-escape.patch +# Upstream commit 14b5d7aa0b55275969809fdf84e8a8caee857c0f (#2120692) +Patch12: file-5.39-regex-optimalizations.patch + URL: https://www.darwinsys.com/file/ Requires: file-libs%{?_isa} = %{version}-%{release} BuildRequires: zlib-devel @@ -210,6 +228,14 @@ cd %{py3dir} %endif %changelog +* Wed Aug 24 2022 Vincent Mihalkovic - 5.39-10 +- speedup magic matching + Resolves: #2120692 + +* Wed Aug 17 2022 Vincent Mihalkovic - 5.39-9 +- fix recognition (src/compress.c) of compressed empty files + Resolves: #2121694 + * Mon Aug 09 2021 Mohan Boddu - 5.39-8 - Rebuilt for IMA sigs, glibc 2.34, aarch64 flags Related: rhbz#1991688