From 23f8fa1af098ae3824a5dcb1ab3ad398b179356f Mon Sep 17 00:00:00 2001 From: Klondike Dragon Date: Sat, 16 Dec 2023 13:52:00 -0700 Subject: [PATCH] Further optimize ambiguous parsing Optimize the common and special case where mm and dd are the same length, just swap in place. Avoids having to reparse the entire string. For this case, it's about 30% faster and reduces allocations by about 15%. This format is especially common, hence the reason to optimize for this case. Also fix the case for ambiguous date/time in the mm:dd:yyyy format. --- bench_test.go | 7 +++++ parseany.go | 82 ++++++++++++++++++++++++++++++++++++------------ parseany_test.go | 33 +++++++++++++++++-- 3 files changed, 100 insertions(+), 22 deletions(-) diff --git a/bench_test.go b/bench_test.go index f979d95..a46e5dd 100644 --- a/bench_test.go +++ b/bench_test.go @@ -97,6 +97,13 @@ func BenchmarkParseAnyErrors(b *testing.B) { } } +func BenchmarkParseAmbiguous(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true)) + } +} + /* func BenchmarkParseDateString(b *testing.B) { b.ReportAllocs() diff --git a/parseany.go b/parseany.go index d9cfd0a..307402f 100644 --- a/parseany.go +++ b/parseany.go @@ -342,6 +342,7 @@ iterRunes: // 03/31/2005 // 31/03/2005 p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { // 03/31/2005 @@ -364,8 +365,8 @@ iterRunes: } case ':': - // 03/31/2005 - // 2014/02/24 + // 03:31:2005 + // 2014:02:24 p.stateDate = dateDigitColon if i == 4 { p.yearlen = i @@ -375,6 +376,7 @@ iterRunes: } } else { p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { p.molen = i @@ -383,6 +385,14 @@ iterRunes: } p.dayi = i + 1 } + } else { + if p.daylen == 0 { + p.daylen = i + if !p.setDay() { + return p, unknownErr(datestr) + } + p.moi = i + 1 + } } } @@ -399,6 +409,7 @@ iterRunes: } } else if i <= 2 { p.ambiguousMD = true + p.ambiguousRetryable = true if p.preferMonthFirst { if p.molen == 0 { // 03.31.2005 @@ -641,7 +652,7 @@ iterRunes: // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // which is a horrible assumption, but seems to be the convention for // dates that are formatted in this way. - p.ambiguousMD = true + p.ambiguousMD = true // not retryable p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day @@ -786,6 +797,11 @@ iterRunes: if !p.setDay() { return p, unknownErr(datestr) } + } else if p.molen == 0 { + p.molen = i - p.moi + if !p.setMonth() { + return p, unknownErr(datestr) + } } break iterRunes case ':': @@ -806,6 +822,14 @@ iterRunes: } p.yeari = i + 1 } + } else { + if p.molen == 0 { + p.molen = i - p.moi + if !p.setMonth() { + return p, unknownErr(datestr) + } + p.yeari = i + 1 + } } } @@ -2260,7 +2284,7 @@ iterRunes: // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // which is a horrible assumption, but seems to be the convention for // dates that are formatted in this way. - p.ambiguousMD = true + p.ambiguousMD = true // not retryable p.yearlen = 2 p.set(p.yeari, "06") // We now also know that part1 was the day @@ -2417,6 +2441,7 @@ type parser struct { preferMonthFirst bool retryAmbiguousDateWithSwap bool ambiguousMD bool + ambiguousRetryable bool allowPartialStringMatch bool stateDate dateState stateTime timeState @@ -2774,7 +2799,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) p.setFullMonth(p.fullMonth) } - if p.retryAmbiguousDateWithSwap && p.ambiguousMD { + if p.retryAmbiguousDateWithSwap && p.ambiguousMD && p.ambiguousRetryable { // month out of range signifies that a day/month swap is the correct solution to an ambiguous date // this is because it means that a day is being interpreted as a month and overflowing the valid value for that // by retrying in this case, we can fix a common situation with no assumptions @@ -2782,19 +2807,35 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) // if actual time parsing errors out with the following error, swap before we // get out of this function to reduce scope it needs to be applied on if err != nil && strings.Contains(err.Error(), "month out of range") { - // create the option to reverse the preference - preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) - // turn off the retry to avoid endless recursion - retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) - modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap) - var newParser *parser - newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...) - defer putBackParser(newParser) - if err == nil { - t, err = newParser.parse(originalLoc, modifiedOpts...) - // The caller might use the format and datestr, so copy that back to the original parser - p.setEntireFormat(newParser.format) - p.datestr = newParser.datestr + // simple optimized case where mm and dd can be swapped directly + if p.molen == 2 && p.daylen == 2 { + moi := p.moi + p.moi = p.dayi + p.dayi = moi + if !p.setDay() || !p.setMonth() { + err = unknownErr(p.datestr) + } else { + if p.loc == nil { + t, err = time.Parse(bytesToString(p.format), p.datestr) + } else { + t, err = time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) + } + } + } else { + // create the option to reverse the preference + preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) + // turn off the retry to avoid endless recursion + retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false) + modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap) + var newParser *parser + newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...) + defer putBackParser(newParser) + if err == nil { + t, err = newParser.parse(originalLoc, modifiedOpts...) + // The caller might use the format and datestr, so copy that back to the original parser + p.setEntireFormat(newParser.format) + p.datestr = newParser.datestr + } } } }() @@ -2824,9 +2865,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) if p.loc == nil { // gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr) return time.Parse(bytesToString(p.format), p.datestr) + } else { + //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) + return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) } - //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) - return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) } func isDay(alpha string) bool { for _, day := range days { diff --git a/parseany_test.go b/parseany_test.go index 9c9b6fa..e2940e7 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -254,6 +254,7 @@ var testInputs = []dateTest{ {in: "04:02:2014 04:08:09.123", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "04:02:2014 04:08:09.12312", out: "2014-04-02 04:08:09.12312 +0000 UTC"}, {in: "04:02:2014 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, + {in: "04:01:2014 04:08:09", out: "2014-01-04 04:08:09 +0000 UTC", preferDayFirst: true}, // mm/dd/yy hh:mm:ss AM {in: "04/02/2014 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, @@ -537,8 +538,10 @@ var testInputs = []dateTest{ {in: "2017-07-19 03:21:51+00:00", out: "2017-07-19 03:21:51 +0000 UTC"}, // yyyy:mm:dd hh:mm:ss+00:00 {in: "2012:08:03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, - // dd:mm:yyyy hh:mm:ss+00:00 + // mm:dd:yyyy hh:mm:ss+00:00 {in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, + {in: "08:04:2012 18:31:59+00:00", out: "2012-04-08 18:31:59 +0000 UTC", preferDayFirst: true}, + {in: "24:03:2012 18:31:59+00:00", out: "2012-03-24 18:31:59 +0000 UTC", retryAmbiguous: true}, // yyyy-mm-dd hh:mm:ss.000+00:00 PST {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, @@ -1068,37 +1071,63 @@ func TestPreferMonthFirst(t *testing.T) { ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC") assert.Equal(t, nil, err) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC") + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC") + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) preferMonthFirstTrue := PreferMonthFirst(true) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) assert.Equal(t, nil, err) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) // allows the day to be preferred before the month, when completely ambiguous preferMonthFirstFalse := PreferMonthFirst(false) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) assert.Equal(t, nil, err) assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } func TestRetryAmbiguousDateWithSwap(t *testing.T) { // default is false _, err := ParseAny("13/02/2014 04:08:09 +0000 UTC") assert.NotEqual(t, nil, err) + _, err = ParseAny("13/2/2014 04:08:09 +0000 UTC") + assert.NotEqual(t, nil, err) // will fail error if the month preference cannot work due to the value being larger than 12 retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false) _, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse) assert.NotEqual(t, nil, err) + _, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse) + assert.NotEqual(t, nil, err) // will retry with the other month preference if this error is detected retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true) ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue) assert.Equal(t, nil, err) assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue) + assert.Equal(t, nil, err) + assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - MustParse("8-Mar-2018::14:09:27") + ts := MustParse("03:08:2012 18:31:59+00:00", PreferMonthFirst(false)) + assert.Equal(t, "2012-08-03 18:31:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) }