From 12845ab14769f3d913db13105f72ca1dae502dce Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Mon, 15 Jul 2024 13:39:09 +0200 Subject: [PATCH] use sync.OnceValue for various regular expressions Using regex.MustCompile consumes a significant amount of memory when importing the package, even if those regular expressions are not used. This changes compiling the regular expressions to use a sync.OnceValue so that they're only compiled the first time they're used. There are various regular expressions remaining that are still compiled on import, but these are exported, so changing them to a sync.OnceValue would be a breaking change; we can still decide to do so, but leaving that for a follow-up. It's worth noting that sync.OnceValue requires go1.21 or up, so raising the minimum version accordingly. Signed-off-by: Sebastiaan van Stijn --- normalize.go | 4 ++-- reference.go | 14 +++++++------- regexp.go | 25 ++++++++++++++++++------- regexp_test.go | 8 ++++---- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/normalize.go b/normalize.go index 4979eec..7c83bd4 100644 --- a/normalize.go +++ b/normalize.go @@ -64,7 +64,7 @@ type normalizedNamed interface { // _ "crypto/sha256" // ) func ParseNormalizedNamed(s string) (Named, error) { - if ok := anchoredIdentifierRegexp.MatchString(s); ok { + if ok := anchoredIdentifierRegexp().MatchString(s); ok { return nil, fmt.Errorf("invalid repository name (%s), cannot specify 64-byte hexadecimal strings", s) } domain, remainder := splitDockerDomain(s) @@ -274,7 +274,7 @@ func TagNameOnly(ref Named) Named { // _ "crypto/sha256" // ) func ParseAnyReference(ref string) (Reference, error) { - if ok := anchoredIdentifierRegexp.MatchString(ref); ok { + if ok := anchoredIdentifierRegexp().MatchString(ref); ok { return digestReference("sha256:" + ref), nil } if dgst, err := digest.Parse(ref); err == nil { diff --git a/reference.go b/reference.go index c343cf6..44a7261 100644 --- a/reference.go +++ b/reference.go @@ -207,7 +207,7 @@ func Path(named Named) (name string) { // If no valid hostname is found, the hostname is empty and the full value // is returned as name func splitDomain(name string) (string, string) { - match := anchoredNameRegexp.FindStringSubmatch(name) + match := anchoredNameRegexp().FindStringSubmatch(name) if len(match) != 3 { return "", name } @@ -232,9 +232,9 @@ func Parse(s string) (Reference, error) { return nil, ErrNameEmpty } - matches := referenceRegexp.FindStringSubmatch(s) + matches := referenceRegexp().FindStringSubmatch(s) if matches == nil { - if sl := strings.ToLower(s); sl != s && referenceRegexp.FindStringSubmatch(sl) != nil { + if sl := strings.ToLower(s); sl != s && referenceRegexp().FindStringSubmatch(sl) != nil { // Succeeds when lower-casing, so input contains an invalid repository name. return nil, ErrNameContainsUppercase } @@ -243,7 +243,7 @@ func Parse(s string) (Reference, error) { var repo repository - nameMatch := anchoredNameRegexp.FindStringSubmatch(matches[1]) + nameMatch := anchoredNameRegexp().FindStringSubmatch(matches[1]) if len(nameMatch) == 3 { repo.domain = nameMatch[1] repo.path = nameMatch[2] @@ -294,7 +294,7 @@ func ParseNamed(s string) (Named, error) { // WithName returns a named object representing the given string. If the input // is invalid ErrReferenceInvalidFormat will be returned. func WithName(name string) (Named, error) { - match := anchoredNameRegexp.FindStringSubmatch(name) + match := anchoredNameRegexp().FindStringSubmatch(name) if match == nil || len(match) != 3 { return nil, ErrReferenceInvalidFormat } @@ -312,7 +312,7 @@ func WithName(name string) (Named, error) { // WithTag combines the name from "name" and the tag from "tag" to form a // reference incorporating both the name and the tag. func WithTag(name Named, tag string) (NamedTagged, error) { - if !anchoredTagRegexp.MatchString(tag) { + if !anchoredTagRegexp().MatchString(tag) { return nil, ErrTagInvalidFormat } var repo repository @@ -338,7 +338,7 @@ func WithTag(name Named, tag string) (NamedTagged, error) { // WithDigest combines the name from "name" and the digest from "digest" to form // a reference incorporating both the name and the digest. func WithDigest(name Named, digest digest.Digest) (Canonical, error) { - if !anchoredDigestRegexp.MatchString(digest.String()) { + if !anchoredDigestRegexp().MatchString(digest.String()) { return nil, ErrDigestInvalidFormat } var repo repository diff --git a/regexp.go b/regexp.go index e7531df..850b8c3 100644 --- a/regexp.go +++ b/regexp.go @@ -3,6 +3,7 @@ package reference import ( "regexp" "strings" + "sync" ) // DigestRegexp matches well-formed digests, including algorithm (e.g. "sha256:"). @@ -31,7 +32,7 @@ var NameRegexp = regexp.MustCompile(namePat) // ReferenceRegexp is the full supported format of a reference. The regexp // is anchored and has capturing groups for name, tag, and digest // components. -var ReferenceRegexp = referenceRegexp +var ReferenceRegexp = referenceRegexp() // TagRegexp matches valid tag names. From [docker/docker:graph/tags.go]. // @@ -112,15 +113,21 @@ var ( // referenceRegexp is the full supported format of a reference. The regexp // is anchored and has capturing groups for name, tag, and digest // components. - referenceRegexp = regexp.MustCompile(referencePat) + referenceRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(referencePat) + }) // anchoredTagRegexp matches valid tag names, anchored at the start and // end of the matched string. - anchoredTagRegexp = regexp.MustCompile(anchored(tag)) + anchoredTagRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(tag)) + }) // anchoredDigestRegexp matches valid digests, anchored at the start and // end of the matched string. - anchoredDigestRegexp = regexp.MustCompile(anchored(digestPat)) + anchoredDigestRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(digestPat)) + }) // pathComponent restricts path-components to start with an alphanumeric // character, with following parts able to be separated by a separator @@ -136,14 +143,18 @@ var ( // anchoredNameRegexp is used to parse a name value, capturing the // domain and trailing components. - anchoredNameRegexp = regexp.MustCompile(anchoredNamePat) - anchoredNamePat = anchored(optional(capture(domainAndPort), `/`), capture(remoteName)) + anchoredNameRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchoredNamePat) + }) + anchoredNamePat = anchored(optional(capture(domainAndPort), `/`), capture(remoteName)) referencePat = anchored(capture(namePat), optional(`:`, capture(tag)), optional(`@`, capture(digestPat))) // anchoredIdentifierRegexp is used to check or match an // identifier value, anchored at start and end of string. - anchoredIdentifierRegexp = regexp.MustCompile(anchored(identifier)) + anchoredIdentifierRegexp = sync.OnceValue(func() *regexp.Regexp { + return regexp.MustCompile(anchored(identifier)) + }) ) // optional wraps the expression in a non-capturing group and makes the diff --git a/regexp_test.go b/regexp_test.go index 4f69965..303f2c7 100644 --- a/regexp_test.go +++ b/regexp_test.go @@ -179,7 +179,7 @@ func TestDomainRegexp(t *testing.T) { func TestFullNameRegexp(t *testing.T) { t.Parallel() - if n := anchoredNameRegexp.NumSubexp(); n != 2 { + if n := anchoredNameRegexp().NumSubexp(); n != 2 { t.Fatalf("anchored name regexp should have two submatches: %v, %v != 2", anchoredNamePat, n) } @@ -471,14 +471,14 @@ func TestFullNameRegexp(t *testing.T) { tc := tc t.Run(tc.input, func(t *testing.T) { t.Parallel() - checkRegexp(t, anchoredNameRegexp, tc) + checkRegexp(t, anchoredNameRegexp(), tc) }) } } func TestReferenceRegexp(t *testing.T) { t.Parallel() - if n := referenceRegexp.NumSubexp(); n != 3 { + if n := referenceRegexp().NumSubexp(); n != 3 { t.Fatalf("anchored name regexp should have three submatches: %v, %v != 3", referencePat, n) } @@ -581,7 +581,7 @@ func TestIdentifierRegexp(t *testing.T) { tc := tc t.Run(tc.input, func(t *testing.T) { t.Parallel() - match := anchoredIdentifierRegexp.MatchString(tc.input) + match := anchoredIdentifierRegexp().MatchString(tc.input) if match != tc.match { t.Errorf("Expected match=%t, got %t", tc.match, match) }