Optimize memory for error case

New option SimpleErrorMessages that avoids allocation in the error path. It's off by default to preserve backwards compatibility.

Added benchmark BenchmarkBigParseAnyErrors that takes the big set of test cases, and injects errors to make them fail at pseudo-random places.

This optimization speeds up the error path runtime by 4x and reduces error path allocation bytes by 13x!
This commit is contained in:
Klondike Dragon 2023-12-16 23:28:15 -07:00
parent d2e1443c4d
commit fbf07cc274
4 changed files with 330 additions and 215 deletions

View File

@ -1,7 +1,7 @@
Go Date Parser Go Date Parser
--------------------------- ---------------------------
Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones.
[![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse) [![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse)
@ -9,7 +9,7 @@ Parse many date strings without knowing format in advance. Uses a scanner to re
[![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse) [![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse)
[![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse) [![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse)
**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. **MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. Some ambiguous formats can fail (e.g., trying to parse 31/03/2023 as the default month-first format `MM/DD/YYYY`), but can be automatically retried with `RetryAmbiguousDateWithSwap`.
```go ```go
@ -21,11 +21,24 @@ t, err := dateparse.ParseStrict("3/1/2014")
> returns error > returns error
// Return a string that represents the layout to parse the given date-time. // Return a string that represents the layout to parse the given date-time.
// For certain highly complex date formats, ParseFormat may not be accurate,
// even if ParseAny is able to correctly parse it (e.g., anything that starts
// with a weekday).
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM") layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM" > "Jan 2, 2006 3:04:05 PM"
``` ```
Performance Considerations
----------------------------------
Internally a memory pool is used to minimize allocation overhead. If you could
be frequently parsing text that does not match any format, consider turning on
the the `SimpleErrorMessages` option. This will make error messages have no
contextual details, but will reduce allocation overhead 13x and will be 4x
faster (most of the time is spent in generating a complex error message if the
option is off (default)).
Timezone Considerations Timezone Considerations
---------------------------------- ----------------------------------

View File

@ -71,9 +71,11 @@ func BenchmarkBigParseIn(b *testing.B) {
func BenchmarkBigParseRetryAmbiguous(b *testing.B) { func BenchmarkBigParseRetryAmbiguous(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
opts := []ParserOption{RetryAmbiguousDateWithSwap(true)}
b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, t := range testInputs { for _, t := range testInputs {
_, _ = ParseAny(t.in, RetryAmbiguousDateWithSwap(true)) _, _ = ParseAny(t.in, opts...)
} }
} }
} }
@ -90,17 +92,48 @@ func BenchmarkShotgunParseErrors(b *testing.B) {
func BenchmarkParseAnyErrors(b *testing.B) { func BenchmarkParseAnyErrors(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
opts := []ParserOption{SimpleErrorMessages(true)}
b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
for _, t := range testParseErrors { for _, t := range testParseErrors {
_, _ = ParseAny(t.in) _, _ = ParseAny(t.in, opts...)
}
}
}
func BenchmarkBigParseAnyErrors(b *testing.B) {
b.ReportAllocs()
opts := []ParserOption{SimpleErrorMessages(true)}
// manufacture a bunch of different tests with random errors put in them
var testBigErrorInputs []string
for index, t := range testInputs {
b := []byte(t.in)
spread := 4 + (index % 4)
startingIndex := spread % len(b)
for i := startingIndex; i < len(b); i += spread {
b[i] = '?'
}
testBigErrorInputs = append(testBigErrorInputs, string(b))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, in := range testBigErrorInputs {
_, err := ParseAny(in, opts...)
if err == nil {
panic(fmt.Sprintf("expected parsing to fail: %s", in))
}
} }
} }
} }
func BenchmarkParseAmbiguous(b *testing.B) { func BenchmarkParseAmbiguous(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()
opts := []ParserOption{RetryAmbiguousDateWithSwap(true)}
b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true)) MustParse("13/02/2014 04:08:09 +0000 UTC", opts...)
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -756,46 +756,52 @@ func TestParse(t *testing.T) {
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
}) })
for _, th := range testInputs { for _, simpleErrorMessage := range []bool{false, true} {
t.Run(th.in, func(t *testing.T) { for _, th := range testInputs {
var ts time.Time t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) {
defer func() { var ts time.Time
if r := recover(); r != nil { defer func() {
t.Fatalf("error: %s", r) if r := recover(); r != nil {
t.Fatalf("error: %s", r)
}
}()
parserOptions := []ParserOption{
PreferMonthFirst(!th.preferDayFirst),
RetryAmbiguousDateWithSwap(th.retryAmbiguous),
SimpleErrorMessages(simpleErrorMessage),
} }
}() if len(th.loc) > 0 {
parserOptions := []ParserOption{PreferMonthFirst(!th.preferDayFirst), RetryAmbiguousDateWithSwap(th.retryAmbiguous)} loc, err := time.LoadLocation(th.loc)
if len(th.loc) > 0 { if err != nil {
loc, err := time.LoadLocation(th.loc) t.Fatalf("Expected to load location %q but got %v", th.loc, err)
if err != nil { }
t.Fatalf("Expected to load location %q but got %v", th.loc, err) ts, err = ParseIn(th.in, loc, parserOptions...)
if err != nil {
t.Fatalf("expected to parse %q but got %v", th.in, err)
}
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
} else {
ts = MustParse(th.in, parserOptions...)
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
} }
ts, err = ParseIn(th.in, loc, parserOptions...) })
if err != nil { }
t.Fatalf("expected to parse %q but got %v", th.in, err)
}
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
} else {
ts = MustParse(th.in, parserOptions...)
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
}
})
} }
// some errors // some errors