Optimize memory for error case

New option SimpleErrorMessages that avoids allocation in the error path. It's off by default to preserve backwards compatibility. Added benchmark BenchmarkBigParseAnyErrors that takes the big set of test cases, and injects errors to make them fail at pseudo-random places. This optimization speeds up the error path runtime by 4x and reduces error path allocation bytes by 13x!
2025-09-16 19:51:21 +08:00 · 2023-12-16 23:28:15 -07:00
parent d2e1443c4d
commit fbf07cc274
4 changed files with 330 additions and 215 deletions
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 Go Date Parser 
 ---------------------------
-Parse many date strings without knowing format in advance.  Uses a scanner to read bytes and use a state machine to find format.  Much faster than shotgun based parse methods.  See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison.
+Parse many date strings without knowing format in advance.  Uses a scanner to read bytes and use a state machine to find format.  Much faster than shotgun based parse methods.  See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones.
 [![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse)
@@ -9,7 +9,7 @@ Parse many date strings without knowing format in advance.  Uses a scanner to re
 [![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse)
 [![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse)
-**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option.
+**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. Some ambiguous formats can fail (e.g., trying to parse 31/03/2023 as the default month-first format `MM/DD/YYYY`), but can be automatically retried with `RetryAmbiguousDateWithSwap`.
 ```go
@@ -21,11 +21,24 @@ t, err := dateparse.ParseStrict("3/1/2014")
 > returns error 
 // Return a string that represents the layout to parse the given date-time.
 // For certain highly complex date formats, ParseFormat may not be accurate,
 // even if ParseAny is able to correctly parse it (e.g., anything that starts
 // with a weekday).
 layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
 > "Jan 2, 2006 3:04:05 PM"
 ```
 Performance Considerations
 ----------------------------------
 Internally a memory pool is used to minimize allocation overhead. If you could
 be frequently parsing text that does not match any format, consider turning on
 the the `SimpleErrorMessages` option. This will make error messages have no
 contextual details, but will reduce allocation overhead 13x and will be 4x
 faster (most of the time is spent in generating a complex error message if the
 option is off (default)).
 Timezone Considerations
 ----------------------------------
--- a/bench_test.go
+++ b/bench_test.go
@@ -71,9 +71,11 @@ func BenchmarkBigParseIn(b *testing.B) {
 func BenchmarkBigParseRetryAmbiguous(b *testing.B) {
 	b.ReportAllocs()
 	opts := []ParserOption{RetryAmbiguousDateWithSwap(true)}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, t := range testInputs {
-			_, _ = ParseAny(t.in, RetryAmbiguousDateWithSwap(true))
+			_, _ = ParseAny(t.in, opts...)
 		}
 	}
 }
@@ -90,17 +92,48 @@ func BenchmarkShotgunParseErrors(b *testing.B) {
 func BenchmarkParseAnyErrors(b *testing.B) {
 	b.ReportAllocs()
 	opts := []ParserOption{SimpleErrorMessages(true)}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, t := range testParseErrors {
-			_, _ = ParseAny(t.in)
+			_, _ = ParseAny(t.in, opts...)
 		}
 	}
 }
 func BenchmarkBigParseAnyErrors(b *testing.B) {
 	b.ReportAllocs()
 	opts := []ParserOption{SimpleErrorMessages(true)}
 	// manufacture a bunch of different tests with random errors put in them
 	var testBigErrorInputs []string
 	for index, t := range testInputs {
 		b := []byte(t.in)
 		spread := 4 + (index % 4)
 		startingIndex := spread % len(b)
 		for i := startingIndex; i < len(b); i += spread {
 			b[i] = '?'
 		}
 		testBigErrorInputs = append(testBigErrorInputs, string(b))
 	}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		for _, in := range testBigErrorInputs {
 			_, err := ParseAny(in, opts...)
 			if err == nil {
 				panic(fmt.Sprintf("expected parsing to fail: %s", in))
 			}
 		}
 	}
 }
 func BenchmarkParseAmbiguous(b *testing.B) {
 	b.ReportAllocs()
 	opts := []ParserOption{RetryAmbiguousDateWithSwap(true)}
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true))
+		MustParse("13/02/2014 04:08:09 +0000 UTC", opts...)
 	}
 }
--- a/parseany.go
+++ b/parseany.go
--- a/parseany_test.go
+++ b/parseany_test.go
@@ -756,46 +756,52 @@ func TestParse(t *testing.T) {
 		assert.NotEqual(t, nil, err)
 	})
-	for _, th := range testInputs {
+	for _, simpleErrorMessage := range []bool{false, true} {
-		t.Run(th.in, func(t *testing.T) {
+		for _, th := range testInputs {
-			var ts time.Time
+			t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) {
-			defer func() {
+				var ts time.Time
-				if r := recover(); r != nil {
+				defer func() {
-					t.Fatalf("error: %s", r)
+					if r := recover(); r != nil {
 						t.Fatalf("error: %s", r)
 					}
 				}()
 				parserOptions := []ParserOption{
 					PreferMonthFirst(!th.preferDayFirst),
 					RetryAmbiguousDateWithSwap(th.retryAmbiguous),
 					SimpleErrorMessages(simpleErrorMessage),
 				}
-			}()
+				if len(th.loc) > 0 {
-			parserOptions := []ParserOption{PreferMonthFirst(!th.preferDayFirst), RetryAmbiguousDateWithSwap(th.retryAmbiguous)}
+					loc, err := time.LoadLocation(th.loc)
-			if len(th.loc) > 0 {
+					if err != nil {
-				loc, err := time.LoadLocation(th.loc)
+						t.Fatalf("Expected to load location %q but got %v", th.loc, err)
-				if err != nil {
+					}
-					t.Fatalf("Expected to load location %q but got %v", th.loc, err)
+					ts, err = ParseIn(th.in, loc, parserOptions...)
 					if err != nil {
 						t.Fatalf("expected to parse %q but got %v", th.in, err)
 					}
 					got := fmt.Sprintf("%v", ts.In(time.UTC))
 					assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
 					if th.out != got {
 						t.Fatalf("whoops, got %s, expected %s", got, th.out)
 					}
 					if len(th.zname) > 0 {
 						gotZone, _ := ts.Zone()
 						assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
 					}
 				} else {
 					ts = MustParse(th.in, parserOptions...)
 					got := fmt.Sprintf("%v", ts.In(time.UTC))
 					assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
 					if th.out != got {
 						t.Fatalf("whoops, got %s, expected %s", got, th.out)
 					}
 					if len(th.zname) > 0 {
 						gotZone, _ := ts.Zone()
 						assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
 					}
 				}
-				ts, err = ParseIn(th.in, loc, parserOptions...)
+			})
-				if err != nil {
+		}
 					t.Fatalf("expected to parse %q but got %v", th.in, err)
 				}
 				got := fmt.Sprintf("%v", ts.In(time.UTC))
 				assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
 				if th.out != got {
 					t.Fatalf("whoops, got %s, expected %s", got, th.out)
 				}
 				if len(th.zname) > 0 {
 					gotZone, _ := ts.Zone()
 					assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
 				}
 			} else {
 				ts = MustParse(th.in, parserOptions...)
 				got := fmt.Sprintf("%v", ts.In(time.UTC))
 				assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
 				if th.out != got {
 					t.Fatalf("whoops, got %s, expected %s", got, th.out)
 				}
 				if len(th.zname) > 0 {
 					gotZone, _ := ts.Zone()
 					assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
 				}
 			}
 		})
 	}
 	// some errors