diff --git a/README.md b/README.md index 0ca7b6a..8986738 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ Go Date Parser --------------------------- -Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. +Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison. See the critical note below about timezones. [![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse) @@ -9,7 +9,7 @@ Parse many date strings without knowing format in advance. Uses a scanner to re [![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse) [![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse) -**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. +**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings. This can be adjusted using the `PreferMonthFirst` parser option. Some ambiguous formats can fail (e.g., trying to parse 31/03/2023 as the default month-first format `MM/DD/YYYY`), but can be automatically retried with `RetryAmbiguousDateWithSwap`. ```go @@ -21,11 +21,24 @@ t, err := dateparse.ParseStrict("3/1/2014") > returns error // Return a string that represents the layout to parse the given date-time. +// For certain highly complex date formats, ParseFormat may not be accurate, +// even if ParseAny is able to correctly parse it (e.g., anything that starts +// with a weekday). layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM") > "Jan 2, 2006 3:04:05 PM" ``` +Performance Considerations +---------------------------------- + +Internally a memory pool is used to minimize allocation overhead. If you could +be frequently parsing text that does not match any format, consider turning on +the the `SimpleErrorMessages` option. This will make error messages have no +contextual details, but will reduce allocation overhead 13x and will be 4x +faster (most of the time is spent in generating a complex error message if the +option is off (default)). + Timezone Considerations ---------------------------------- diff --git a/bench_test.go b/bench_test.go index a46e5dd..db371c8 100644 --- a/bench_test.go +++ b/bench_test.go @@ -71,9 +71,11 @@ func BenchmarkBigParseIn(b *testing.B) { func BenchmarkBigParseRetryAmbiguous(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{RetryAmbiguousDateWithSwap(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { for _, t := range testInputs { - _, _ = ParseAny(t.in, RetryAmbiguousDateWithSwap(true)) + _, _ = ParseAny(t.in, opts...) } } } @@ -90,17 +92,48 @@ func BenchmarkShotgunParseErrors(b *testing.B) { func BenchmarkParseAnyErrors(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{SimpleErrorMessages(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { for _, t := range testParseErrors { - _, _ = ParseAny(t.in) + _, _ = ParseAny(t.in, opts...) + } + } +} + +func BenchmarkBigParseAnyErrors(b *testing.B) { + b.ReportAllocs() + + opts := []ParserOption{SimpleErrorMessages(true)} + // manufacture a bunch of different tests with random errors put in them + var testBigErrorInputs []string + for index, t := range testInputs { + b := []byte(t.in) + spread := 4 + (index % 4) + startingIndex := spread % len(b) + for i := startingIndex; i < len(b); i += spread { + b[i] = '?' + } + testBigErrorInputs = append(testBigErrorInputs, string(b)) + } + b.ResetTimer() + + for i := 0; i < b.N; i++ { + for _, in := range testBigErrorInputs { + _, err := ParseAny(in, opts...) + if err == nil { + panic(fmt.Sprintf("expected parsing to fail: %s", in)) + } } } } func BenchmarkParseAmbiguous(b *testing.B) { b.ReportAllocs() + opts := []ParserOption{RetryAmbiguousDateWithSwap(true)} + b.ResetTimer() for i := 0; i < b.N; i++ { - MustParse("13/02/2014 04:08:09 +0000 UTC", RetryAmbiguousDateWithSwap(true)) + MustParse("13/02/2014 04:08:09 +0000 UTC", opts...) } } diff --git a/parseany.go b/parseany.go index 87333ba..d0c516e 100644 --- a/parseany.go +++ b/parseany.go @@ -137,14 +137,25 @@ var ( ErrAmbiguousMMDD = fmt.Errorf("this date has ambiguous mm/dd vs dd/mm type format") ErrCouldntFindFormat = fmt.Errorf("could not find format for") ErrUnexpectedTail = fmt.Errorf("unexpected content after date/time: ") + ErrUnknownTZOffset = fmt.Errorf("TZ offset not recognized") + ErrUnknownTimeZone = fmt.Errorf("timezone not recognized") + ErrFracSecTooLong = fmt.Errorf("fractional seconds too long") ) -func unknownErr(datestr string) error { - return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) +func (p *parser) unknownErr(datestr string) error { + if p == nil || !p.simpleErrorMessages { + return fmt.Errorf("%w %q", ErrCouldntFindFormat, datestr) + } else { + return ErrCouldntFindFormat + } } -func unexpectedTail(tail string) error { - return fmt.Errorf("%w %q", ErrUnexpectedTail, tail) +func (p *parser) unexpectedTail(tailStart int) error { + if p != nil && !p.simpleErrorMessages { + return fmt.Errorf("%w %q", ErrUnexpectedTail, p.datestr[tailStart:]) + } else { + return ErrUnexpectedTail + } } // go 1.20 allows us to convert a byte slice to a string without a memory allocation. @@ -283,12 +294,15 @@ iterRunes: // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr) switch p.stateDate { case dateStart: - if unicode.IsDigit(r) { + // NOTE: don't use unicode.IsDigit and unicode.IsLetter here because + // we don't expect non-ANSI chars to start a valid date/time format. + // This will let us quickly reject strings that begin with any non-ANSI char. + if '0' <= r && r <= '9' { p.stateDate = dateDigit - } else if unicode.IsLetter(r) { + } else if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') { p.stateDate = dateAlpha } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigit: @@ -317,7 +331,7 @@ iterRunes: p.yearlen = i // since it was start of datestr, i=len p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitYearSlash } else { @@ -332,7 +346,7 @@ iterRunes: p.daylen = 2 p.dayi = 0 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } continue } @@ -346,21 +360,21 @@ iterRunes: // 03/31/2005 p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -373,7 +387,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { p.ambiguousMD = true @@ -382,21 +396,21 @@ iterRunes: if p.molen == 0 { p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -410,7 +424,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if i <= 2 { p.ambiguousMD = true @@ -420,21 +434,21 @@ iterRunes: // 03.31.2005 p.molen = i if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { if p.daylen == 0 { p.daylen = i if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -454,7 +468,7 @@ iterRunes: p.yearlen = i p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateYearWs } else if i == 6 { @@ -470,10 +484,10 @@ iterRunes: p.yearlen = i - 2 p.moi = i + 1 if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ',': - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 's', 'S', 'r', 'R', 't', 'T', 'n', 'N': // 1st January 2018 // 2nd Jan 2018 23:59 @@ -482,7 +496,7 @@ iterRunes: i-- default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } continue } @@ -503,18 +517,18 @@ iterRunes: // 2013-Feb-03 // 2013-February-03 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi p.dayi = i + 1 p.stateDate = dateYearDashDash if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if unicode.IsLetter(r) { p.stateDate = dateYearDashAlpha } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -532,14 +546,14 @@ iterRunes: p.daylen = i - p.dayi p.stateDate = dateYearDashDashOffset if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': p.daylen = i - p.dayi p.stateDate = dateYearDashDashWs p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes case 'T': @@ -547,12 +561,12 @@ iterRunes: p.stateDate = dateYearDashDashT p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -561,7 +575,7 @@ iterRunes: // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 // (this state should never be reached, we break out when in this state) - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case dateYearDashDashOffset: // 2020-07-20+00:00 @@ -570,7 +584,7 @@ iterRunes: p.set(p.offseti, "-07:00") default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -579,7 +593,7 @@ iterRunes: // 2013-Feb-03 // 2013-February-03 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi // Must be a valid short or long month if p.molen == 3 { @@ -593,12 +607,12 @@ iterRunes: p.dayi = i + 1 p.stateDate = dateYearDashDash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -612,28 +626,28 @@ iterRunes: p.stateDate = dateDigitDashDigit p.moi = i } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigitDashAlpha: // 13-Feb-03 // 28-Feb-03 // 29-Jun-2016 switch r { - case '-': + case '-', '\u2212': p.molen = i - p.moi p.set(p.moi, "Jan") p.yeari = i + 1 p.stateDate = dateDigitDashAlphaDash default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateDigitDashDigit: // 29-06-2026 switch r { - case '-': + case '-', '\u2212': // X // 29-06-2026 p.molen = i - p.moi @@ -642,11 +656,11 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateDigitDashDigitDash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -673,11 +687,11 @@ iterRunes: } } if !doubleColonTimeConnector { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else if p.link > 0 { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } if r == ' ' || doubleColonTimeConnector { // we need to find if this was 4 digits, aka year @@ -696,7 +710,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if length == 2 { // We have no idea if this is @@ -713,17 +727,17 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -739,7 +753,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes @@ -747,13 +761,13 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -778,15 +792,15 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateDigitSlashAlphaSlash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -799,13 +813,13 @@ iterRunes: if p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -827,7 +841,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -835,7 +849,7 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -851,13 +865,13 @@ iterRunes: i++ } if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -878,17 +892,17 @@ iterRunes: if p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } break iterRunes @@ -898,7 +912,7 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.dayi = i + 1 } @@ -906,7 +920,7 @@ iterRunes: if p.daylen == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } @@ -914,14 +928,14 @@ iterRunes: if p.molen == 0 { p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } } default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -941,7 +955,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart if i > p.daylen+len(" Sep") { // November etc @@ -956,7 +970,7 @@ iterRunes: p.fullMonth = possibleFullMonth p.stateDate = dateDigitWsMoYear } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { // If len=3, the might be Feb or May? Ie ambigous abbreviated but @@ -970,7 +984,7 @@ iterRunes: } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -984,19 +998,19 @@ iterRunes: case ',': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } i++ break iterRunes case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1017,11 +1031,11 @@ iterRunes: p.dayi = i + 1 p.stateDate = dateYearWsMonthWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } else if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateYearWsMonthWs: // 2013 Jan 06 15:04:05 @@ -1040,7 +1054,7 @@ iterRunes: break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1055,24 +1069,24 @@ iterRunes: p.molen = i - p.moi - 2 p.dayi = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case '日': // day p.daylen = i - p.dayi - 2 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': if p.daylen <= 0 { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitChineseYearWs p.stateTime = timeStart break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateDigitDot: @@ -1088,7 +1102,7 @@ iterRunes: p.daylen = i - p.dayi p.yeari = i + 1 if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } else if p.dayi == 0 && p.yearlen == 0 { @@ -1096,7 +1110,7 @@ iterRunes: p.molen = i - p.moi p.yeari = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } else { @@ -1105,12 +1119,12 @@ iterRunes: p.molen = i - p.moi p.dayi = i + 1 if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateDigitDotDot } } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateDigitDotDot: @@ -1126,14 +1140,14 @@ iterRunes: p.daylen = i - p.dayi p.stateDate = dateDigitDotDotOffset if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case ' ': p.daylen = i - p.dayi p.stateDate = dateDigitDotDotWs p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes case 'T': @@ -1141,12 +1155,12 @@ iterRunes: p.stateDate = dateDigitDotDotT p.stateTime = timeStart if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1155,7 +1169,7 @@ iterRunes: // 2006-01-02T15:04:05Z07:00 // 2020-08-17T17:00:00:000+0100 // (should be unreachable, we break in this state) - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case dateDigitDotDotOffset: // 2020-07-20+00:00 @@ -1164,7 +1178,7 @@ iterRunes: p.set(p.offseti, "-07:00") default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1233,7 +1247,7 @@ iterRunes: p.dayi = i + 1 break } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if i == 3 { @@ -1245,7 +1259,7 @@ iterRunes: // May 8 17:57:51 2009 p.stateDate = dateAlphaWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == ',': @@ -1262,7 +1276,7 @@ iterRunes: p.skip = i + 2 i++ } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case r == '.': @@ -1278,7 +1292,7 @@ iterRunes: putBackParser(p) return parseTime(newDateStr, loc, opts...) } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == '/': // X @@ -1301,12 +1315,12 @@ iterRunes: p.fullMonth = possibleFullMonth p.stateDate = dateAlphaSlash } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: if !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1338,7 +1352,7 @@ iterRunes: case r == ' ': // continue default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigit: @@ -1353,13 +1367,13 @@ iterRunes: if r == ',' { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateAlphaWsDigitMore } else if r == ' ' { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 p.stateDate = dateAlphaWsDigitYearMaybe @@ -1368,7 +1382,7 @@ iterRunes: p.stateDate = dateVariousDaySuffix i-- } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitYearMaybe: // x @@ -1387,11 +1401,11 @@ iterRunes: // must be year format, not 15:04 p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } break iterRunes } else if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitMore: // x @@ -1404,7 +1418,7 @@ iterRunes: p.yeari = i + 1 p.stateDate = dateAlphaWsDigitMoreWs } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaWsDigitMoreWs: // x @@ -1425,13 +1439,13 @@ iterRunes: p.stateDate = dateAlphaWsDigitMoreWsYear p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes default: if r != '\'' && !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1448,7 +1462,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 'n', 'N': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1457,7 +1471,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 's', 'S': if p.nextIs(i, 't') || p.nextIs(i, 'T') { if len(p.datestr) > i+2 { @@ -1466,7 +1480,7 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) case 'r', 'R': if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if len(p.datestr) > i+2 { @@ -1475,9 +1489,9 @@ iterRunes: return parseTime(newDateStr, loc, opts...) } } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaFullMonthWs: @@ -1493,13 +1507,13 @@ iterRunes: if p.nextIs(i, ' ') { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 2 p.stateDate = dateAlphaFullMonthWsDayWs i++ } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case r == ' ': @@ -1507,7 +1521,7 @@ iterRunes: // January 02 2006, 15:04:05 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 p.stateDate = dateAlphaFullMonthWsDayWs @@ -1520,12 +1534,12 @@ iterRunes: // January 2nd, 2006, 15:04:05 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateVariousDaySuffix i-- default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaFullMonthWsDayWs: // X @@ -1537,7 +1551,7 @@ iterRunes: case ',': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart i++ @@ -1545,13 +1559,13 @@ iterRunes: case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -1564,7 +1578,7 @@ iterRunes: p.stateDate = dateAlphaWsDigit p.dayi = i default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlash: @@ -1577,7 +1591,7 @@ iterRunes: p.stateDate = dateAlphaSlashDigit p.dayi = i default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlashDigit: @@ -1593,13 +1607,13 @@ iterRunes: p.yeari = i + 1 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateDate = dateAlphaSlashDigitSlash case unicode.IsDigit(r): // continue default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateAlphaSlashDigitSlash: @@ -1610,7 +1624,7 @@ iterRunes: p.stateTime = timeStart break iterRunes default: - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case dateWeekdayComma: @@ -1624,12 +1638,12 @@ iterRunes: switch r { case ' ': fallthrough - case '-': + case '-', '\u2212': if p.moi == 0 { p.moi = i + 1 p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if p.yeari == 0 { p.yeari = i + 1 @@ -1637,7 +1651,7 @@ iterRunes: if p.molen == 3 { p.set(p.moi, "Jan") } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { p.stateTime = timeStart @@ -1645,7 +1659,7 @@ iterRunes: } default: if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case dateWeekdayAbbrevComma: @@ -1663,13 +1677,13 @@ iterRunes: offset++ } fallthrough - case '-': + case '-', '\u2212': if p.dayi == 0 { p.dayi = i + 1 } else if p.moi == 0 { p.daylen = i - p.dayi if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.moi = i + 1 } else if p.yeari == 0 { @@ -1677,30 +1691,30 @@ iterRunes: if p.molen == 3 { p.set(p.moi, "Jan") } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.yeari = i + 1 } else { p.yearlen = i - p.yeari - offset if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } p.stateTime = timeStart break iterRunes } default: if !unicode.IsDigit(r) && !unicode.IsLetter(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } default: // Reaching an unhandled state unexpectedly should always fail parsing - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } if !p.coalesceDate(i) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } if p.stateTime == timeStart { // increment first one, since the i++ occurs at end of loop @@ -1811,7 +1825,7 @@ iterRunes: // skip 'M' i++ default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } case ' ': @@ -1900,14 +1914,14 @@ iterRunes: case ' ': p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case '+', '-': p.offseti = i p.stateTime = timeWsYearOffset default: if !unicode.IsDigit(r) { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case timeWsAlpha: @@ -1961,7 +1975,7 @@ iterRunes: if i+1 == len(p.datestr) { p.stateTime = timeWsAlphaRParen } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -2002,7 +2016,7 @@ iterRunes: p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } } @@ -2016,7 +2030,7 @@ iterRunes: isTwoLetterWord := ((i+1) == len(p.datestr) || p.nextIs(i, ' ')) if (r == 'm' || r == 'M') && isTwoLetterWord { if p.parsedAMPM { - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } // This isn't a time zone after all... p.tzi = 0 @@ -2042,7 +2056,7 @@ iterRunes: p.stateTime = timeWs } else { // unexpected garbage after AM/PM indicator, fail - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } case timeWsOffset: @@ -2092,7 +2106,7 @@ iterRunes: p.yearlen = i - p.yeari + 1 if p.yearlen == 4 { if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } case unicode.IsLetter(r): @@ -2178,11 +2192,11 @@ iterRunes: i++ p.stateTime = timePeriodAMPM default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } default: if !unicode.IsDigit(r) { - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } case timePeriodAMPM: @@ -2193,11 +2207,11 @@ iterRunes: p.offseti = i p.stateTime = timeOffset default: - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } case timeZ: // nothing expected can come after Z - return p, unexpectedTail(p.datestr[i:]) + return p, p.unexpectedTail(i) } } @@ -2210,12 +2224,20 @@ iterRunes: // may or may not have a space on the end if offsetLen == 7 { if p.datestr[p.offseti+6] != ' ' { - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + } } } p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:p.offseti+offsetLen]) + } } // process timezone switch len(p.datestr) - p.tzi { @@ -2225,7 +2247,11 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } case timeWsAlpha: switch len(p.datestr) - p.tzi { @@ -2235,7 +2261,11 @@ iterRunes: case 4: p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } case timeWsAlphaRParen: @@ -2244,12 +2274,12 @@ iterRunes: case timeWsAlphaWs: p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case timeWsYear: p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } case timeWsAlphaZoneOffsetWsExtra: p.trimExtra(false) @@ -2263,13 +2293,21 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:i]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:i]) + } } case timePeriod: p.mslen = i - p.msi if p.mslen >= 10 { - return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, p.datestr[p.msi:p.mslen]) + if p.simpleErrorMessages { + return p, ErrFracSecTooLong + } else { + return p, fmt.Errorf("%w in %q near %q", ErrFracSecTooLong, datestr, p.datestr[p.msi:p.mslen]) + } } case timeOffset, timeWsOffset, timeWsYearOffset: switch len(p.datestr) - p.offseti { @@ -2280,7 +2318,11 @@ iterRunes: // 19:55:00+0100 (or 19:55:00 +0100) p.set(p.offseti, "-0700") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (must be 2 or 4 digits optional colon)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) + } } case timeWsOffsetWs: @@ -2295,7 +2337,11 @@ iterRunes: // 13:31:51.999 +01:00 CEST p.set(p.tzi, "MST ") default: - return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:]) + if p.simpleErrorMessages { + return p, ErrUnknownTimeZone + } else { + return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:]) + } } } case timeOffsetColon, timeWsOffsetColon: @@ -2305,7 +2351,11 @@ iterRunes: case 6: p.set(p.offseti, "-07:00") default: - return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:]) + if p.simpleErrorMessages { + return p, ErrUnknownTZOffset + } else { + return p, fmt.Errorf("%w %q near %q (expected offset like -07:00)", ErrUnknownTZOffset, datestr, p.datestr[p.offseti:]) + } } } p.coalesceTime(i) @@ -2352,7 +2402,7 @@ iterRunes: p.setEntireFormat([]byte("2006")) return p, nil } else if len(p.datestr) < 4 { - return p, fmt.Errorf("unrecognized format, too short %v", datestr) + return p, p.unknownErr(datestr) } if !t.IsZero() { if loc == nil { @@ -2418,7 +2468,7 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else if length == 2 { // We have no idea if this is @@ -2435,10 +2485,10 @@ iterRunes: p.dayi = 0 p.daylen = p.part1Len if !p.setDay() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } else { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } @@ -2452,7 +2502,7 @@ iterRunes: // 2014.05 p.molen = i - p.moi if !p.setMonth() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } return p, nil } @@ -2495,7 +2545,7 @@ iterRunes: if p.stateTime == timeIgnore && p.yearlen == 0 { p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } } return p, nil @@ -2507,7 +2557,7 @@ iterRunes: // oct 1, 1970 p.yearlen = i - p.yeari if !p.setYear() { - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } return p, nil @@ -2579,7 +2629,7 @@ iterRunes: } - return p, unknownErr(datestr) + return p, p.unknownErr(datestr) } type parser struct { @@ -2589,6 +2639,7 @@ type parser struct { ambiguousMD bool ambiguousRetryable bool allowPartialStringMatch bool + simpleErrorMessages bool stateDate dateState stateTime timeState format []byte @@ -2690,11 +2741,22 @@ func AllowPartialStringMatch(allowPartialStringMatch bool) ParserOption { } } +// SimpleErrorMessages is an option that will cause returned error messages to contain less detail, +// but it will avoid allocating any memory for the custom error message. If you expect to attempt +// to parse a lot of text that is not valid, this could help reduce GC pressure. +func SimpleErrorMessages(simpleErrorMessages bool) ParserOption { + return func(p *parser) error { + p.simpleErrorMessages = simpleErrorMessages + return nil + } +} + // Creates a new parser. The caller must call putBackParser on the returned parser when done with it. func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) { dateStrLen := len(dateStr) if dateStrLen > longestPossibleDateStr { - return nil, unknownErr(dateStr) + var nilParser *parser + return nil, nilParser.unknownErr(dateStr) } // Make sure to re-use the format byte slice from the pooled parser struct @@ -2936,7 +2998,8 @@ func (p *parser) trimExtra(onlyTrimFormat bool) { func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) (t time.Time, err error) { if p == nil { - return time.Time{}, unknownErr("") + var nilParser *parser + return time.Time{}, nilParser.unknownErr("") } if p.t != nil { return *p.t, nil @@ -2959,7 +3022,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) p.moi = p.dayi p.dayi = moi if !p.setDay() || !p.setMonth() { - err = unknownErr(p.datestr) + err = p.unknownErr(p.datestr) } else { if p.loc == nil { t, err = time.Parse(bytesToString(p.format), p.datestr) @@ -2993,7 +3056,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) // any numbers or letters in the format string. validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true) if validFormatTo < len(p.format) { - return time.Time{}, unexpectedTail(p.datestr[p.formatSetLen:]) + return time.Time{}, p.unexpectedTail(p.formatSetLen) } } diff --git a/parseany_test.go b/parseany_test.go index e99904e..fa9b8d2 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -756,46 +756,52 @@ func TestParse(t *testing.T) { assert.NotEqual(t, nil, err) }) - for _, th := range testInputs { - t.Run(th.in, func(t *testing.T) { - var ts time.Time - defer func() { - if r := recover(); r != nil { - t.Fatalf("error: %s", r) + for _, simpleErrorMessage := range []bool{false, true} { + for _, th := range testInputs { + t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) { + var ts time.Time + defer func() { + if r := recover(); r != nil { + t.Fatalf("error: %s", r) + } + }() + parserOptions := []ParserOption{ + PreferMonthFirst(!th.preferDayFirst), + RetryAmbiguousDateWithSwap(th.retryAmbiguous), + SimpleErrorMessages(simpleErrorMessage), } - }() - parserOptions := []ParserOption{PreferMonthFirst(!th.preferDayFirst), RetryAmbiguousDateWithSwap(th.retryAmbiguous)} - if len(th.loc) > 0 { - loc, err := time.LoadLocation(th.loc) - if err != nil { - t.Fatalf("Expected to load location %q but got %v", th.loc, err) + if len(th.loc) > 0 { + loc, err := time.LoadLocation(th.loc) + if err != nil { + t.Fatalf("Expected to load location %q but got %v", th.loc, err) + } + ts, err = ParseIn(th.in, loc, parserOptions...) + if err != nil { + t.Fatalf("expected to parse %q but got %v", th.in, err) + } + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } + } else { + ts = MustParse(th.in, parserOptions...) + got := fmt.Sprintf("%v", ts.In(time.UTC)) + assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) + if th.out != got { + t.Fatalf("whoops, got %s, expected %s", got, th.out) + } + if len(th.zname) > 0 { + gotZone, _ := ts.Zone() + assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) + } } - ts, err = ParseIn(th.in, loc, parserOptions...) - if err != nil { - t.Fatalf("expected to parse %q but got %v", th.in, err) - } - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) - } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) - } - } else { - ts = MustParse(th.in, parserOptions...) - got := fmt.Sprintf("%v", ts.In(time.UTC)) - assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) - if th.out != got { - t.Fatalf("whoops, got %s, expected %s", got, th.out) - } - if len(th.zname) > 0 { - gotZone, _ := ts.Zone() - assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in) - } - } - }) + }) + } } // some errors