Further optimize ambiguous parsing

Optimize the common and special case where mm and dd are the same length, just swap in place. Avoids having to reparse the entire string.

For this case, it's about 30% faster and reduces allocations by about 15%.

This format is especially common, hence the reason to optimize for this case.

Also fix the case for ambiguous date/time in the mm:dd:yyyy format.
This commit is contained in:
Klondike Dragon 2023-12-16 13:52:00 -07:00
parent ed5310d0c1
commit 23f8fa1af0
3 changed files with 100 additions and 22 deletions

View File

@ -97,6 +97,13 @@ func BenchmarkParseAnyErrors(b *testing.B) {
} }
} }
func BenchmarkParseAmbiguous(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true))
}
}
/* /*
func BenchmarkParseDateString(b *testing.B) { func BenchmarkParseDateString(b *testing.B) {
b.ReportAllocs() b.ReportAllocs()

View File

@ -342,6 +342,7 @@ iterRunes:
// 03/31/2005 // 03/31/2005
// 31/03/2005 // 31/03/2005
p.ambiguousMD = true p.ambiguousMD = true
p.ambiguousRetryable = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
// 03/31/2005 // 03/31/2005
@ -364,8 +365,8 @@ iterRunes:
} }
case ':': case ':':
// 03/31/2005 // 03:31:2005
// 2014/02/24 // 2014:02:24
p.stateDate = dateDigitColon p.stateDate = dateDigitColon
if i == 4 { if i == 4 {
p.yearlen = i p.yearlen = i
@ -375,6 +376,7 @@ iterRunes:
} }
} else { } else {
p.ambiguousMD = true p.ambiguousMD = true
p.ambiguousRetryable = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
p.molen = i p.molen = i
@ -383,6 +385,14 @@ iterRunes:
} }
p.dayi = i + 1 p.dayi = i + 1
} }
} else {
if p.daylen == 0 {
p.daylen = i
if !p.setDay() {
return p, unknownErr(datestr)
}
p.moi = i + 1
}
} }
} }
@ -399,6 +409,7 @@ iterRunes:
} }
} else if i <= 2 { } else if i <= 2 {
p.ambiguousMD = true p.ambiguousMD = true
p.ambiguousRetryable = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
// 03.31.2005 // 03.31.2005
@ -641,7 +652,7 @@ iterRunes:
// We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for // which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way. // dates that are formatted in this way.
p.ambiguousMD = true p.ambiguousMD = true // not retryable
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
// We now also know that part1 was the day // We now also know that part1 was the day
@ -786,6 +797,11 @@ iterRunes:
if !p.setDay() { if !p.setDay() {
return p, unknownErr(datestr) return p, unknownErr(datestr)
} }
} else if p.molen == 0 {
p.molen = i - p.moi
if !p.setMonth() {
return p, unknownErr(datestr)
}
} }
break iterRunes break iterRunes
case ':': case ':':
@ -806,6 +822,14 @@ iterRunes:
} }
p.yeari = i + 1 p.yeari = i + 1
} }
} else {
if p.molen == 0 {
p.molen = i - p.moi
if !p.setMonth() {
return p, unknownErr(datestr)
}
p.yeari = i + 1
}
} }
} }
@ -2260,7 +2284,7 @@ iterRunes:
// We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for // which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way. // dates that are formatted in this way.
p.ambiguousMD = true p.ambiguousMD = true // not retryable
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
// We now also know that part1 was the day // We now also know that part1 was the day
@ -2417,6 +2441,7 @@ type parser struct {
preferMonthFirst bool preferMonthFirst bool
retryAmbiguousDateWithSwap bool retryAmbiguousDateWithSwap bool
ambiguousMD bool ambiguousMD bool
ambiguousRetryable bool
allowPartialStringMatch bool allowPartialStringMatch bool
stateDate dateState stateDate dateState
stateTime timeState stateTime timeState
@ -2774,7 +2799,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
p.setFullMonth(p.fullMonth) p.setFullMonth(p.fullMonth)
} }
if p.retryAmbiguousDateWithSwap && p.ambiguousMD { if p.retryAmbiguousDateWithSwap && p.ambiguousMD && p.ambiguousRetryable {
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date // month out of range signifies that a day/month swap is the correct solution to an ambiguous date
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that // this is because it means that a day is being interpreted as a month and overflowing the valid value for that
// by retrying in this case, we can fix a common situation with no assumptions // by retrying in this case, we can fix a common situation with no assumptions
@ -2782,6 +2807,21 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
// if actual time parsing errors out with the following error, swap before we // if actual time parsing errors out with the following error, swap before we
// get out of this function to reduce scope it needs to be applied on // get out of this function to reduce scope it needs to be applied on
if err != nil && strings.Contains(err.Error(), "month out of range") { if err != nil && strings.Contains(err.Error(), "month out of range") {
// simple optimized case where mm and dd can be swapped directly
if p.molen == 2 && p.daylen == 2 {
moi := p.moi
p.moi = p.dayi
p.dayi = moi
if !p.setDay() || !p.setMonth() {
err = unknownErr(p.datestr)
} else {
if p.loc == nil {
t, err = time.Parse(bytesToString(p.format), p.datestr)
} else {
t, err = time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc)
}
}
} else {
// create the option to reverse the preference // create the option to reverse the preference
preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst) preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst)
// turn off the retry to avoid endless recursion // turn off the retry to avoid endless recursion
@ -2797,6 +2837,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
p.datestr = newParser.datestr p.datestr = newParser.datestr
} }
} }
}
}() }()
} }
@ -2824,10 +2865,11 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
if p.loc == nil { if p.loc == nil {
// gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr) // gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr)
return time.Parse(bytesToString(p.format), p.datestr) return time.Parse(bytesToString(p.format), p.datestr)
} } else {
//gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc) //gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc)
return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc) return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc)
} }
}
func isDay(alpha string) bool { func isDay(alpha string) bool {
for _, day := range days { for _, day := range days {
if alpha == day { if alpha == day {

View File

@ -254,6 +254,7 @@ var testInputs = []dateTest{
{in: "04:02:2014 04:08:09.123", out: "2014-04-02 04:08:09.123 +0000 UTC"}, {in: "04:02:2014 04:08:09.123", out: "2014-04-02 04:08:09.123 +0000 UTC"},
{in: "04:02:2014 04:08:09.12312", out: "2014-04-02 04:08:09.12312 +0000 UTC"}, {in: "04:02:2014 04:08:09.12312", out: "2014-04-02 04:08:09.12312 +0000 UTC"},
{in: "04:02:2014 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"}, {in: "04:02:2014 04:08:09.123123", out: "2014-04-02 04:08:09.123123 +0000 UTC"},
{in: "04:01:2014 04:08:09", out: "2014-01-04 04:08:09 +0000 UTC", preferDayFirst: true},
// mm/dd/yy hh:mm:ss AM // mm/dd/yy hh:mm:ss AM
{in: "04/02/2014 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09am", out: "2014-04-02 04:08:09 +0000 UTC"},
{in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"}, {in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"},
@ -537,8 +538,10 @@ var testInputs = []dateTest{
{in: "2017-07-19 03:21:51+00:00", out: "2017-07-19 03:21:51 +0000 UTC"}, {in: "2017-07-19 03:21:51+00:00", out: "2017-07-19 03:21:51 +0000 UTC"},
// yyyy:mm:dd hh:mm:ss+00:00 // yyyy:mm:dd hh:mm:ss+00:00
{in: "2012:08:03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, {in: "2012:08:03 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"},
// dd:mm:yyyy hh:mm:ss+00:00 // mm:dd:yyyy hh:mm:ss+00:00
{in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"}, {in: "08:03:2012 18:31:59+00:00", out: "2012-08-03 18:31:59 +0000 UTC"},
{in: "08:04:2012 18:31:59+00:00", out: "2012-04-08 18:31:59 +0000 UTC", preferDayFirst: true},
{in: "24:03:2012 18:31:59+00:00", out: "2012-03-24 18:31:59 +0000 UTC", retryAmbiguous: true},
// yyyy-mm-dd hh:mm:ss.000+00:00 PST // yyyy-mm-dd hh:mm:ss.000+00:00 PST
{in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"}, {in: "2012-08-03 18:31:59.000+00:00 PST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "America/Los_Angeles", zname: "PST"},
{in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"}, {in: "2012-08-03 18:31:59.000+00:00 CEST", out: "2012-08-03 18:31:59 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"},
@ -1068,37 +1071,63 @@ func TestPreferMonthFirst(t *testing.T) {
ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC") ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC")
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC")
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC")
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
preferMonthFirstTrue := PreferMonthFirst(true) preferMonthFirstTrue := PreferMonthFirst(true)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
// allows the day to be preferred before the month, when completely ambiguous // allows the day to be preferred before the month, when completely ambiguous
preferMonthFirstFalse := PreferMonthFirst(false) preferMonthFirstFalse := PreferMonthFirst(false)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse) ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("4/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("04/2/2014 04:08:09 +0000 UTC", preferMonthFirstFalse)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
} }
func TestRetryAmbiguousDateWithSwap(t *testing.T) { func TestRetryAmbiguousDateWithSwap(t *testing.T) {
// default is false // default is false
_, err := ParseAny("13/02/2014 04:08:09 +0000 UTC") _, err := ParseAny("13/02/2014 04:08:09 +0000 UTC")
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
_, err = ParseAny("13/2/2014 04:08:09 +0000 UTC")
assert.NotEqual(t, nil, err)
// will fail error if the month preference cannot work due to the value being larger than 12 // will fail error if the month preference cannot work due to the value being larger than 12
retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false) retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false)
_, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse) _, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse)
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
_, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse)
assert.NotEqual(t, nil, err)
// will retry with the other month preference if this error is detected // will retry with the other month preference if this error is detected
retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true) retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true)
ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue) ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
ts, err = ParseAny("13/2/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
} }
// Convenience function for debugging a particular broken test case // Convenience function for debugging a particular broken test case
func TestDebug(t *testing.T) { func TestDebug(t *testing.T) {
MustParse("8-Mar-2018::14:09:27") ts := MustParse("03:08:2012 18:31:59+00:00", PreferMonthFirst(false))
assert.Equal(t, "2012-08-03 18:31:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
} }