8a9804
From 14b5d7aa0b55275969809fdf84e8a8caee857c0f Mon Sep 17 00:00:00 2001
8a9804
From: Christos Zoulas <christos@zoulas.com>
8a9804
Date: Mon, 18 Apr 2022 21:38:10 +0000
8a9804
Subject: [PATCH] From Dirk Mueller: * regex rules need literal dots escaped,
8a9804
 otherwise they are considered   any character * literal search strings can be
8a9804
 searched using search rather than the   much more expensive regex * use
8a9804
 standard xml declaration search as used in other format matchers * only match
8a9804
 the first 1024 bytes, the information we look for should   be in the very
8a9804
 first tag * remove unnecessary parentheses
8a9804
8a9804
---
8a9804
 magic/Magdir/dataone | 28 ++++++++++++++--------------
8a9804
 1 file changed, 14 insertions(+), 14 deletions(-)
8a9804
8a9804
diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone
8a9804
index 8ef3f7981..566633eff 100644
8a9804
--- a/magic/Magdir/dataone
8a9804
+++ b/magic/Magdir/dataone
8a9804
@@ -1,6 +1,6 @@
8a9804
 
8a9804
 #------------------------------------------------------------------------------
8a9804
-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $
8a9804
+# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $
8a9804
 #
8a9804
 # DataONE- files from Dave Vieglais <dave.vieglais@gmail.com> &
8a9804
 #                     Pratik Shrivastava <pratikshrivastava23@gmail.com>
8a9804
@@ -9,39 +9,39 @@
8a9804
 #------------------------------------------------------------------------------
8a9804
 
8a9804
 # EML (Ecological Metadata Language Format)
8a9804
-0	string	
8a9804
->&0	regex	(eml)-[0-9].[0-9].[0-9]+	eml://ecoinformatics.org/%s
8a9804
+0	string	\
8a9804
+>&0	regex/1024	eml-[0-9]\\.[0-9]\\.[0-9]+	eml://ecoinformatics.org/%s
8a9804
 
8a9804
 # onedcx (DataONE Dublin Core Extended v1.0)
8a9804
->&0	regex	(onedcx/v)[0-9].[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
8a9804
+>&0	regex/1024	onedcx/v[0-9]\\.[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
8a9804
 
8a9804
 # FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata,
8a9804
 # version 001-1998)
8a9804
->&0	regex	fgdc				FGDC-STD-001-1998
8a9804
+>&0	search/1024	fgdc				FGDC-STD-001-1998
8a9804
 
8a9804
 # Mercury (Oak Ridge National Lab Mercury Metadata version 1.0)
8a9804
->&0	regex	(mercury/terms/v)[0-9].[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
8a9804
+>&0	regex/1024	mercury/terms/v[0-9]\\.[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
8a9804
 
8a9804
 # ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language)
8a9804
->&0	regex	isotc211
8a9804
->>&0	regex	eng;USA				https://www.isotc211.org/2005/gmd
8a9804
+>&0	search/1024	isotc211
8a9804
+>>&0	search/1024	eng;USA				https://www.isotc211.org/2005/gmd
8a9804
 
8a9804
 # ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language)
8a9804
->>&0	regex	gov.noaa.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
8a9804
+>>&0	regex/1024	gov\\.noaa\\.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
8a9804
 
8a9804
 # ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language
8a9804
->>&0	regex	pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
8a9804
+>>&0	regex/1024	pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
8a9804
 !:mime	text/xml
8a9804
 
8a9804
 
8a9804
 # Object Reuse and Exchange Vocabulary
8a9804
-0	string	
8a9804
->&0	regex	rdf
8a9804
->>&0	regex	openarchives	https://www.openarchives.org/ore/terms
8a9804
+0	string	\
8a9804
+>&0	search/1024	rdf
8a9804
+>>&0	search/1024	openarchives	https://www.openarchives.org/ore/terms
8a9804
 !:mime application/rdf+xml
8a9804
 
8a9804
 
8a9804
 # Dryad Metadata Application Profile Version 3.1
8a9804
 0	string	
8a9804
->&0	regex	(dryad-bibo/v)[0-9].[0-9]	https://datadryad.org/profile/v3.1
8a9804
+>&0	regex/1024	dryad-bibo/v[0-9]\\.[0-9]	https://datadryad.org/profile/v3.1
8a9804
 !:mime	text/xml