From 14b5d7aa0b55275969809fdf84e8a8caee857c0f Mon Sep 17 00:00:00 2001
From: Christos Zoulas <christos@zoulas.com>
Date: Mon, 18 Apr 2022 21:38:10 +0000
Subject: [PATCH] From Dirk Mueller: * regex rules need literal dots escaped,
otherwise they are considered any character * literal search strings can be
searched using search rather than the much more expensive regex * use
standard xml declaration search as used in other format matchers * only match
the first 1024 bytes, the information we look for should be in the very
first tag * remove unnecessary parentheses
---
magic/Magdir/dataone | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone
index 8ef3f7981..566633eff 100644
--- a/magic/Magdir/dataone
+++ b/magic/Magdir/dataone
@@ -1,6 +1,6 @@
#------------------------------------------------------------------------------
-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $
+# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $
#
# DataONE- files from Dave Vieglais <dave.vieglais@gmail.com> &
# Pratik Shrivastava <pratikshrivastava23@gmail.com>
@@ -9,39 +9,39 @@
#------------------------------------------------------------------------------
# EML (Ecological Metadata Language Format)
-0 string <?xml
->&0 regex (eml)-[0-9].[0-9].[0-9]+ eml://ecoinformatics.org/%s
+0 string \<?xml\ version=
+>&0 regex/1024 eml-[0-9]\\.[0-9]\\.[0-9]+ eml://ecoinformatics.org/%s
# onedcx (DataONE Dublin Core Extended v1.0)
->&0 regex (onedcx/v)[0-9].[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0
+>&0 regex/1024 onedcx/v[0-9]\\.[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0
# FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata,
# version 001-1998)
->&0 regex fgdc FGDC-STD-001-1998
+>&0 search/1024 fgdc FGDC-STD-001-1998
# Mercury (Oak Ridge National Lab Mercury Metadata version 1.0)
->&0 regex (mercury/terms/v)[0-9].[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0
+>&0 regex/1024 mercury/terms/v[0-9]\\.[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0
# ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language)
->&0 regex isotc211
->>&0 regex eng;USA https://www.isotc211.org/2005/gmd
+>&0 search/1024 isotc211
+>>&0 search/1024 eng;USA https://www.isotc211.org/2005/gmd
# ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language)
->>&0 regex gov.noaa.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa
+>>&0 regex/1024 gov\\.noaa\\.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa
# ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language
->>&0 regex pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea
+>>&0 regex/1024 pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea
!:mime text/xml
# Object Reuse and Exchange Vocabulary
-0 string <?xml
->&0 regex rdf
->>&0 regex openarchives https://www.openarchives.org/ore/terms
+0 string \<?xml\ version=
+>&0 search/1024 rdf
+>>&0 search/1024 openarchives https://www.openarchives.org/ore/terms
!:mime application/rdf+xml
# Dryad Metadata Application Profile Version 3.1
0 string <DryadData
->&0 regex (dryad-bibo/v)[0-9].[0-9] https://datadryad.org/profile/v3.1
+>&0 regex/1024 dryad-bibo/v[0-9]\\.[0-9] https://datadryad.org/profile/v3.1
!:mime text/xml