Allow weekday prefix for most date formats

This is implemented now using the "skip" parser field, indicating
to skip the first N characters. This also avoids a recursive parse
in one more case (more efficient). This simplifies the state machine
a little bit, while the rest of the code needs to properly account
for the value of the skip field.

Also allow whitespace prefix without penalty.

Modify the test suite to psuedo-randomly add a weekday prefix
to the formats that allow it (all except the purely numeric ones).
This commit is contained in:
Klondike Dragon 2023-12-23 20:04:38 -07:00
parent 9f7bdf7101
commit fd21b1ee3e
2 changed files with 182 additions and 240 deletions

View File

@ -112,8 +112,6 @@ const (
dateAlphaSlash dateAlphaSlash
dateAlphaSlashDigit dateAlphaSlashDigit
dateAlphaSlashDigitSlash dateAlphaSlashDigitSlash
dateWeekdayComma
dateWeekdayAbbrevComma
dateYearWs dateYearWs
dateYearWsMonthWs dateYearWsMonthWs
) )
@ -308,10 +306,13 @@ iterRunes:
if bytesConsumed > 1 { if bytesConsumed > 1 {
i += bytesConsumed - 1 i += bytesConsumed - 1
} }
adjustedI := i - p.skip
// gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr) // gou.Debugf("i=%d r=%s state=%d %s", i, string(r), p.stateDate, p.datestr)
switch p.stateDate { switch p.stateDate {
case dateStart: case dateStart:
// Note that we can reach this state either at the very start of the string,
// or after skipping something (like a weekday, etc).
// NOTE: don't use unicode.IsDigit and unicode.IsLetter here because // NOTE: don't use unicode.IsDigit and unicode.IsLetter here because
// we don't expect non-ANSI chars to start a valid date/time format. // we don't expect non-ANSI chars to start a valid date/time format.
// This will let us quickly reject strings that begin with any non-ANSI char. // This will let us quickly reject strings that begin with any non-ANSI char.
@ -319,6 +320,10 @@ iterRunes:
p.stateDate = dateDigit p.stateDate = dateDigit
} else if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') { } else if ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') {
p.stateDate = dateAlpha p.stateDate = dateAlpha
} else if r == ' ' {
// we can safely ignore whitespace at the start of strings (helps with
// situations where we skipped a weekday and came back to this state)
p.skip = i + 1
} else { } else {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -330,12 +335,12 @@ iterRunes:
// 2013-Feb-03 // 2013-Feb-03
// 13-Feb-03 // 13-Feb-03
// 29-Jun-2016 // 29-Jun-2016
if i == 4 { if adjustedI == 4 {
p.stateDate = dateYearDash p.stateDate = dateYearDash
p.yeari = 0 p.yeari = p.skip
p.yearlen = i p.yearlen = i - p.skip
p.moi = i + 1 p.moi = i + 1
p.set(0, "2006") p.set(p.skip, "2006")
} else { } else {
p.stateDate = dateDigitDash p.stateDate = dateDigitDash
} }
@ -344,9 +349,10 @@ iterRunes:
// 03/31/2005 // 03/31/2005
// 2014/02/24 // 2014/02/24
p.stateDate = dateDigitSlash p.stateDate = dateDigitSlash
if i == 4 { if adjustedI == 4 {
// 2014/02/24 - Year first / // 2014/02/24 - Year first /
p.yearlen = i // since it was start of datestr, i=len p.yeari = p.skip
p.yearlen = i - p.skip
p.moi = i + 1 p.moi = i + 1
if !p.setYear() { if !p.setYear() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -362,7 +368,7 @@ iterRunes:
p.stateDate = dateDigitSlashAlpha p.stateDate = dateDigitSlashAlpha
p.moi = i + 1 p.moi = i + 1
p.daylen = 2 p.daylen = 2
p.dayi = 0 p.dayi = p.skip
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -376,7 +382,8 @@ iterRunes:
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
// 03/31/2005 // 03/31/2005
p.molen = i p.moi = p.skip
p.molen = i - p.skip
if !p.setMonth() { if !p.setMonth() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -386,7 +393,8 @@ iterRunes:
} }
} else { } else {
if p.daylen == 0 { if p.daylen == 0 {
p.daylen = i p.dayi = p.skip
p.daylen = i - p.skip
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -401,8 +409,9 @@ iterRunes:
// 03:31:2005 // 03:31:2005
// 2014:02:24 // 2014:02:24
p.stateDate = dateDigitColon p.stateDate = dateDigitColon
if i == 4 { if adjustedI == 4 {
p.yearlen = i p.yeari = p.skip
p.yearlen = i - p.skip
p.moi = i + 1 p.moi = i + 1
if !p.setYear() { if !p.setYear() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -412,7 +421,8 @@ iterRunes:
p.ambiguousRetryable = true p.ambiguousRetryable = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
p.molen = i p.moi = p.skip
p.molen = i - p.skip
if !p.setMonth() { if !p.setMonth() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -422,7 +432,8 @@ iterRunes:
} }
} else { } else {
if p.daylen == 0 { if p.daylen == 0 {
p.daylen = i p.dayi = p.skip
p.daylen = i - p.skip
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -438,19 +449,21 @@ iterRunes:
// 08.21.71 // 08.21.71
// 2014.05 // 2014.05
p.stateDate = dateDigitDot p.stateDate = dateDigitDot
if i == 4 { if adjustedI == 4 {
p.yearlen = i p.yeari = p.skip
p.yearlen = i - p.skip
p.moi = i + 1 p.moi = i + 1
if !p.setYear() { if !p.setYear() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
} else if i <= 2 { } else if adjustedI <= 2 {
p.ambiguousMD = true p.ambiguousMD = true
p.ambiguousRetryable = true p.ambiguousRetryable = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
// 03.31.2005 // 03.31.2005
p.molen = i p.moi = p.skip
p.molen = i - p.skip
if !p.setMonth() { if !p.setMonth() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -460,7 +473,8 @@ iterRunes:
} }
} else { } else {
if p.daylen == 0 { if p.daylen == 0 {
p.daylen = i p.dayi = p.skip
p.daylen = i - p.skip
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
@ -482,24 +496,26 @@ iterRunes:
// 12 Feb 2006, 19:17 // 12 Feb 2006, 19:17
// 12 Feb 2006, 19:17:22 // 12 Feb 2006, 19:17:22
// 2013 Jan 06 15:04:05 // 2013 Jan 06 15:04:05
if i == 4 { if adjustedI == 4 {
p.yearlen = i p.yeari = p.skip
p.yearlen = i - p.skip
p.moi = i + 1 p.moi = i + 1
if !p.setYear() { if !p.setYear() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
p.stateDate = dateYearWs p.stateDate = dateYearWs
} else if i == 6 { } else if adjustedI == 6 {
p.stateDate = dateDigitSt p.stateDate = dateDigitSt
} else { } else {
p.stateDate = dateDigitWs p.stateDate = dateDigitWs
p.dayi = 0 p.dayi = p.skip
p.daylen = i p.daylen = i - p.skip
} }
case '年': case '年':
// Chinese Year // Chinese Year
p.stateDate = dateDigitChineseYear p.stateDate = dateDigitChineseYear
p.yearlen = i - 2 p.yeari = p.skip
p.yearlen = i - 2 - p.skip
p.moi = i + 1 p.moi = i + 1
if !p.setYear() { if !p.setYear() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -518,10 +534,10 @@ iterRunes:
} }
continue continue
} }
p.part1Len = i p.part1Len = i - p.skip
case dateDigitSt: case dateDigitSt:
p.set(0, "060102") p.set(p.skip, "060102")
i = i - 1 i = i - 1
p.stateTime = timeStart p.stateTime = timeStart
break iterRunes break iterRunes
@ -738,7 +754,7 @@ iterRunes:
p.yearlen = 4 p.yearlen = 4
p.set(p.yeari, "2006") p.set(p.yeari, "2006")
// We now also know that part1 was the day // We now also know that part1 was the day
p.dayi = 0 p.dayi = p.skip
p.daylen = p.part1Len p.daylen = p.part1Len
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -755,7 +771,7 @@ iterRunes:
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
// We now also know that part1 was the day // We now also know that part1 was the day
p.dayi = 0 p.dayi = p.skip
p.daylen = p.part1Len p.daylen = p.part1Len
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -983,13 +999,13 @@ iterRunes:
case ' ': case ' ':
p.yeari = i + 1 p.yeari = i + 1
//p.yearlen = 4 //p.yearlen = 4
p.dayi = 0 p.dayi = p.skip
p.daylen = p.part1Len p.daylen = p.part1Len
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
p.stateTime = timeStart p.stateTime = timeStart
if i > p.daylen+len(" Sep") { // November etc if adjustedI > p.daylen+len(" Sep") { // November etc
// If this is a legit full month, then change the string we're parsing // If this is a legit full month, then change the string we're parsing
// to compensate for the longest month, and do the same with the format string. We // to compensate for the longest month, and do the same with the format string. We
// must maintain a corresponding length/content and this is the easiest // must maintain a corresponding length/content and this is the easiest
@ -1252,18 +1268,6 @@ iterRunes:
// Oct/07/1970 // Oct/07/1970
// February/ 7/1970 // February/ 7/1970
// February/07/1970 // February/07/1970
//
// dateWeekdayComma
// Monday, 02 Jan 2006 15:04:05 MST
// Monday, 02-Jan-06 15:04:05 MST
// Monday, 02 Jan 2006 15:04:05 -0700
// Monday, 02 Jan 2006 15:04:05 +0100
// dateWeekdayAbbrevComma
// Mon, 02 Jan 2006 15:04:05 MST
// Mon, 02 Jan 2006 15:04:05 -0700
// Thu, 13 Jul 2017 08:58:40 +0100
// Tue, 11 Jul 2017 16:28:13 +0200 (CEST)
// Mon, 02-Jan-06 15:04:05 MST
switch { switch {
case r == ' ': case r == ' ':
// This could be a weekday or a month, detect and parse both cases. // This could be a weekday or a month, detect and parse both cases.
@ -1272,21 +1276,17 @@ iterRunes:
// Tuesday 05 May 2020, 05:05:05 // Tuesday 05 May 2020, 05:05:05
// Mon Jan 2 15:04:05 2006 // Mon Jan 2 15:04:05 2006
// Monday Jan 2 15:04:05 2006 // Monday Jan 2 15:04:05 2006
maybeDayOrMonth := strings.ToLower(p.datestr[0:i]) maybeDayOrMonth := strings.ToLower(p.datestr[p.skip:i])
if isDay(maybeDayOrMonth) { if isDay(maybeDayOrMonth) {
// using skip throws off indices used by other code; saner to restart p.skip = i + 1
newDateStr := p.datestr[i+1:] p.stateDate = dateStart
putBackParser(p) } else if adjustedI > 3 {
return parseTime(newDateStr, loc, opts...) // X
} // April 8, 2009
// X
// April 8, 2009
if i > 3 {
// Expecting a full month name at this point // Expecting a full month name at this point
if isMonthFull(maybeDayOrMonth) { if isMonthFull(maybeDayOrMonth) {
p.moi = 0 p.moi = p.skip
p.molen = i p.molen = i - p.skip
p.fullMonth = maybeDayOrMonth p.fullMonth = maybeDayOrMonth
p.stateDate = dateAlphaFullMonthWs p.stateDate = dateAlphaFullMonthWs
p.dayi = i + 1 p.dayi = i + 1
@ -1295,7 +1295,7 @@ iterRunes:
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
} else if i == 3 { } else if adjustedI == 3 {
// dateAlphaWs // dateAlphaWs
// May 05, 2005, 05:05:05 // May 05, 2005, 05:05:05
// May 05 2005, 05:05:05 // May 05 2005, 05:05:05
@ -1309,14 +1309,11 @@ iterRunes:
case r == ',': case r == ',':
// Mon, 02 Jan 2006 // Mon, 02 Jan 2006
// Monday, 02 Jan 2006
if i == 3 { if adjustedI >= 3 && p.nextIs(i, ' ') {
p.stateDate = dateWeekdayAbbrevComma maybeDay := strings.ToLower(p.datestr[p.skip:i])
p.set(0, "Mon")
} else {
maybeDay := strings.ToLower(p.datestr[0:i])
if isDay(maybeDay) { if isDay(maybeDay) {
p.stateDate = dateWeekdayComma p.stateDate = dateStart
// Just skip past the weekday, it contains no valuable info // Just skip past the weekday, it contains no valuable info
p.skip = i + 2 p.skip = i + 2
i++ i++
@ -1328,12 +1325,13 @@ iterRunes:
// sept. 28, 2017 // sept. 28, 2017
// jan. 28, 2017 // jan. 28, 2017
p.stateDate = dateAlphaPeriodWsDigit p.stateDate = dateAlphaPeriodWsDigit
if i == 3 { if adjustedI == 3 {
p.molen = i p.moi = p.skip
p.set(0, "Jan") p.molen = i - p.skip
} else if i == 4 { p.set(p.skip, "Jan")
} else if adjustedI == 4 {
// gross // gross
newDateStr := p.datestr[0:i-1] + p.datestr[i:] newDateStr := p.datestr[p.skip:i-1] + p.datestr[i:]
putBackParser(p) putBackParser(p)
return parseTime(newDateStr, loc, opts...) return parseTime(newDateStr, loc, opts...)
} else { } else {
@ -1347,15 +1345,15 @@ iterRunes:
// February/ 7/1970 // February/ 7/1970
// February/07/1970 // February/07/1970
// Must be a valid short or long month // Must be a valid short or long month
if i == 3 { if adjustedI == 3 {
p.moi = 0 p.moi = p.skip
p.molen = i - p.moi p.molen = i - p.moi
p.set(p.moi, "Jan") p.set(p.moi, "Jan")
p.stateDate = dateAlphaSlash p.stateDate = dateAlphaSlash
} else { } else {
possibleFullMonth := strings.ToLower(p.datestr[:i]) possibleFullMonth := strings.ToLower(p.datestr[p.skip:i])
if i > 3 && isMonthFull(possibleFullMonth) { if adjustedI > 3 && isMonthFull(possibleFullMonth) {
p.moi = 0 p.moi = p.skip
p.molen = i - p.moi p.molen = i - p.moi
p.fullMonth = possibleFullMonth p.fullMonth = possibleFullMonth
p.stateDate = dateAlphaSlash p.stateDate = dateAlphaSlash
@ -1385,13 +1383,24 @@ iterRunes:
// May 08 17:57:51 2009 // May 08 17:57:51 2009
// oct 1, 1970 // oct 1, 1970
// oct 7, '70 // oct 7, '70
// (this state is only entered if the skip-adjusted length is 3)
switch { switch {
case unicode.IsLetter(r): case unicode.IsLetter(r):
p.set(0, "Mon") // have to have a day of week and then at least a 3 digit month to follow
p.stateDate = dateAlphaWsAlpha if adjustedI >= 3 && (i+3) < len(p.datestr) {
p.set(i, "Jan") maybeDay := strings.ToLower(p.datestr[p.skip:i])
if isDay(maybeDay) {
p.skip = i
p.stateDate = dateAlphaWsAlpha
p.set(i, "Jan")
} else {
return p, p.unknownErr(datestr)
}
} else {
return p, p.unknownErr(datestr)
}
case unicode.IsDigit(r): case unicode.IsDigit(r):
p.set(0, "Jan") p.set(p.skip, "Jan")
p.stateDate = dateAlphaWsDigit p.stateDate = dateAlphaWsDigit
p.dayi = i p.dayi = i
case r == ' ': case r == ' ':
@ -1438,9 +1447,9 @@ iterRunes:
// Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) // Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)
if r == ':' { if r == ':' {
// Guessed wrong; was not a year // Guessed wrong; was not a year
p.yeari = 0
i = i - 3 i = i - 3
p.stateDate = dateAlphaWsDigit p.stateDate = dateAlphaWsDigit
p.yeari = 0
break iterRunes break iterRunes
} else if r == ' ' { } else if r == ' ' {
// must be year format, not 15:04 // must be year format, not 15:04
@ -1502,7 +1511,7 @@ iterRunes:
case 't', 'T': case 't', 'T':
if p.nextIs(i, 'h') || p.nextIs(i, 'H') { if p.nextIs(i, 'h') || p.nextIs(i, 'H') {
if len(p.datestr) > i+2 { if len(p.datestr) > i+2 {
newDateStr := p.datestr[0:i] + p.datestr[i+2:] newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:]
putBackParser(p) putBackParser(p)
return parseTime(newDateStr, loc, opts...) return parseTime(newDateStr, loc, opts...)
} }
@ -1511,7 +1520,7 @@ iterRunes:
case 'n', 'N': case 'n', 'N':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(p.datestr) > i+2 { if len(p.datestr) > i+2 {
newDateStr := p.datestr[0:i] + p.datestr[i+2:] newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:]
putBackParser(p) putBackParser(p)
return parseTime(newDateStr, loc, opts...) return parseTime(newDateStr, loc, opts...)
} }
@ -1520,7 +1529,7 @@ iterRunes:
case 's', 'S': case 's', 'S':
if p.nextIs(i, 't') || p.nextIs(i, 'T') { if p.nextIs(i, 't') || p.nextIs(i, 'T') {
if len(p.datestr) > i+2 { if len(p.datestr) > i+2 {
newDateStr := p.datestr[0:i] + p.datestr[i+2:] newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:]
putBackParser(p) putBackParser(p)
return parseTime(newDateStr, loc, opts...) return parseTime(newDateStr, loc, opts...)
} }
@ -1529,7 +1538,7 @@ iterRunes:
case 'r', 'R': case 'r', 'R':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(p.datestr) > i+2 { if len(p.datestr) > i+2 {
newDateStr := p.datestr[0:i] + p.datestr[i+2:] newDateStr := p.datestr[p.skip:i] + p.datestr[i+2:]
putBackParser(p) putBackParser(p)
return parseTime(newDateStr, loc, opts...) return parseTime(newDateStr, loc, opts...)
} }
@ -1672,87 +1681,6 @@ iterRunes:
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
} }
case dateWeekdayComma:
// Monday, 02 Jan 2006 15:04:05 MST
// Monday, 02 Jan 2006 15:04:05 -0700
// Monday, 02 Jan 2006 15:04:05 +0100
// Monday, 02-Jan-06 15:04:05 MST
if p.dayi == 0 {
p.dayi = i
}
switch r {
case ' ':
fallthrough
case '-', '\u2212':
if p.moi == 0 {
p.moi = i + 1
p.daylen = i - p.dayi
if !p.setDay() {
return p, p.unknownErr(datestr)
}
} else if p.yeari == 0 {
p.yeari = i + 1
p.molen = i - p.moi
if p.molen == 3 {
p.set(p.moi, "Jan")
} else {
return p, p.unknownErr(datestr)
}
} else {
p.stateTime = timeStart
break iterRunes
}
default:
if !unicode.IsDigit(r) && !unicode.IsLetter(r) {
return p, p.unknownErr(datestr)
}
}
case dateWeekdayAbbrevComma:
// Mon, 02 Jan 2006 15:04:05 MST
// Mon, 02 Jan 2006 15:04:05 -0700
// Thu, 13 Jul 2017 08:58:40 +0100
// Thu, 4 Jan 2018 17:53:36 +0000
// Tue, 11 Jul 2017 16:28:13 +0200 (CEST)
// Mon, 02-Jan-06 15:04:05 MST
var offset int
switch r {
case ' ':
for i+1 < len(p.datestr) && p.datestr[i+1] == ' ' {
i++
offset++
}
fallthrough
case '-', '\u2212':
if p.dayi == 0 {
p.dayi = i + 1
} else if p.moi == 0 {
p.daylen = i - p.dayi
if !p.setDay() {
return p, p.unknownErr(datestr)
}
p.moi = i + 1
} else if p.yeari == 0 {
p.molen = i - p.moi - offset
if p.molen == 3 {
p.set(p.moi, "Jan")
} else {
return p, p.unknownErr(datestr)
}
p.yeari = i + 1
} else {
p.yearlen = i - p.yeari - offset
if !p.setYear() {
return p, p.unknownErr(datestr)
}
p.stateTime = timeStart
break iterRunes
}
default:
if !unicode.IsDigit(r) && !unicode.IsLetter(r) {
return p, p.unknownErr(datestr)
}
}
default: default:
// Reaching an unhandled state unexpectedly should always fail parsing // Reaching an unhandled state unexpectedly should always fail parsing
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -2567,7 +2495,7 @@ iterRunes:
p.yearlen = 4 p.yearlen = 4
p.set(p.yeari, "2006") p.set(p.yeari, "2006")
// We now also know that part1 was the day // We now also know that part1 was the day
p.dayi = 0 p.dayi = p.skip
p.daylen = p.part1Len p.daylen = p.part1Len
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -2584,7 +2512,7 @@ iterRunes:
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
// We now also know that part1 was the day // We now also know that part1 was the day
p.dayi = 0 p.dayi = p.skip
p.daylen = p.part1Len p.daylen = p.part1Len
if !p.setDay() { if !p.setDay() {
return p, p.unknownErr(datestr) return p, p.unknownErr(datestr)
@ -2715,17 +2643,6 @@ iterRunes:
// February/07/1970 // February/07/1970
return p, nil return p, nil
case dateWeekdayComma:
// Monday, 02 Jan 2006 15:04:05 -0700
// Monday, 02 Jan 2006 15:04:05 +0100
// Monday, 02-Jan-06 15:04:05 MST
return p, nil
case dateWeekdayAbbrevComma:
// Mon, 02-Jan-06 15:04:05 MST
// Mon, 02 Jan 2006 15:04:05 MST
return p, nil
case dateYearWsMonthWs: case dateYearWsMonthWs:
// 2013 May 02 11:37:55 // 2013 May 02 11:37:55
// 2013 December 02 11:37:55 // 2013 December 02 11:37:55
@ -3129,8 +3046,9 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
if err != nil && strings.Contains(err.Error(), "month out of range") { if err != nil && strings.Contains(err.Error(), "month out of range") {
// simple optimized case where mm and dd can be swapped directly // simple optimized case where mm and dd can be swapped directly
if p.molen == 2 && p.daylen == 2 { if p.molen == 2 && p.daylen == 2 {
moi := p.moi // skipped bytes have already been removed, so compensate for that
p.moi = p.dayi moi := p.moi - p.skip
p.moi = p.dayi - p.skip
p.dayi = moi p.dayi = moi
if !p.setDay() || !p.setMonth() { if !p.setDay() || !p.setMonth() {
err = p.unknownErr(p.datestr) err = p.unknownErr(p.datestr)

View File

@ -19,6 +19,7 @@ type dateTest struct {
preferDayFirst bool preferDayFirst bool
retryAmbiguous bool retryAmbiguous bool
expectAmbiguous bool expectAmbiguous bool
allowWeekdayPrefix bool
} }
var testInputs = []dateTest{ var testInputs = []dateTest{
@ -81,6 +82,7 @@ var testInputs = []dateTest{
{in: "Mon 30 Sep 2018 09:09:09 PM CEST", out: "2018-09-30 21:09:09 +0000 UTC", zname: "CEST"}, {in: "Mon 30 Sep 2018 09:09:09 PM CEST", out: "2018-09-30 21:09:09 +0000 UTC", zname: "CEST"},
{in: "Mon 02 Jan 2006", out: "2006-01-02 00:00:00 +0000 UTC"}, {in: "Mon 02 Jan 2006", out: "2006-01-02 00:00:00 +0000 UTC"},
{in: "Monday 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"}, {in: "Monday 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC", zname: "UTC"},
{in: "SUNDAY, July 05 2015", out: "2015-07-05 00:00:00 +0000 UTC", zname: "UTC"},
// RubyDate = "Mon Jan 02 15:04:05 -0700 2006" // RubyDate = "Mon Jan 02 15:04:05 -0700 2006"
{in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"}, {in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"},
{in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, {in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"},
@ -222,8 +224,9 @@ var testInputs = []dateTest{
{in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"},
{in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"},
{in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"}, {in: "Fri, 03-Jul-15 08:08:08 PST", out: "2015-07-03 16:08:08 +0000 UTC", loc: "America/Los_Angeles", zname: "PDT"},
{in: "Fri, 03-Jul 2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"}, {in: "Fri, 03-Jul-2015", out: "2015-07-03 00:00:00 +0000 UTC"},
{in: "Fri, 03-Jul 2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 03-Jul-2015 08:08:08 PST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "PST"},
{in: "Fri, 03-Jul-2015 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"},
{in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 3-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"},
{in: "Fri, 3-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"}, {in: "Fri, 3-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"},
{in: "Fri, 03-Jul-15 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"}, {in: "Fri, 03-Jul-15 8:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"},
@ -697,22 +700,22 @@ var testInputs = []dateTest{
{in: "08/04/2014 22:05", out: "2014-04-08 22:05:00 +0000 UTC", preferDayFirst: true}, {in: "08/04/2014 22:05", out: "2014-04-08 22:05:00 +0000 UTC", preferDayFirst: true},
{in: "2/04/2014 03:00:51", out: "2014-02-04 03:00:51 +0000 UTC", preferDayFirst: false}, {in: "2/04/2014 03:00:51", out: "2014-02-04 03:00:51 +0000 UTC", preferDayFirst: false},
{in: "2/04/2014 03:00:51", out: "2014-04-02 03:00:51 +0000 UTC", preferDayFirst: true}, {in: "2/04/2014 03:00:51", out: "2014-04-02 03:00:51 +0000 UTC", preferDayFirst: true},
{in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, {in: "19/03/2012 10:11:56", out: "2012-03-19 10:11:56 +0000 UTC", retryAmbiguous: true},
{in: "19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, {in: "19/03/2012 10:11:57", out: "2012-03-19 10:11:57 +0000 UTC", preferDayFirst: true},
{in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", retryAmbiguous: true}, {in: "19/03/2012 10:11:58.3186369", out: "2012-03-19 10:11:58.3186369 +0000 UTC", retryAmbiguous: true},
{in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", preferDayFirst: true}, {in: "19/03/2012 10:11:59.3186369", out: "2012-03-19 10:11:59.3186369 +0000 UTC", preferDayFirst: true},
// For certain parse modes that restart parsing, make sure that parsing options are passed along! // For certain parse modes that restart parsing, make sure that parsing options are passed along!
{in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", retryAmbiguous: true}, {in: "Monday 19/03/2012 10:11:50", out: "2012-03-19 10:11:50 +0000 UTC", retryAmbiguous: true},
{in: "Monday 19/03/2012 10:11:59", out: "2012-03-19 10:11:59 +0000 UTC", preferDayFirst: true}, {in: "Monday 19/03/2012 10:11:51", out: "2012-03-19 10:11:51 +0000 UTC", preferDayFirst: true},
// https://github.com/araddon/dateparse/issues/105 // https://github.com/araddon/dateparse/issues/105
{in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", retryAmbiguous: true}, {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", retryAmbiguous: true},
{in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", preferDayFirst: true}, {in: "20/5/2006 19:51:45", out: "2006-05-20 19:51:45 +0000 UTC", preferDayFirst: true},
// yyyymmdd and similar // yyyymmdd and similar
{in: "2014", out: "2014-01-01 00:00:00 +0000 UTC"}, {in: "2014", out: "2014-01-01 00:00:00 +0000 UTC", allowWeekdayPrefix: false},
{in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC"}, {in: "20140601", out: "2014-06-01 00:00:00 +0000 UTC", allowWeekdayPrefix: false},
{in: "20140722105203", out: "2014-07-22 10:52:03 +0000 UTC"}, {in: "20140722105203", out: "2014-07-22 10:52:03 +0000 UTC", allowWeekdayPrefix: false},
// https://github.com/araddon/dateparse/issues/143 // https://github.com/araddon/dateparse/issues/143
{in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC"}, {in: "20140722105203.364", out: "2014-07-22 10:52:03.364 +0000 UTC", allowWeekdayPrefix: false},
// yymmdd hh:mm:yy mysql log https://github.com/araddon/dateparse/issues/119 // yymmdd hh:mm:yy mysql log https://github.com/araddon/dateparse/issues/119
// 080313 05:21:55 mysqld started // 080313 05:21:55 mysqld started
// 080313 5:21:55 InnoDB: Started; log sequence number 0 43655 // 080313 5:21:55 InnoDB: Started; log sequence number 0 43655
@ -721,11 +724,11 @@ var testInputs = []dateTest{
{in: "190910 11:51:49", out: "2019-09-10 11:51:49 +0000 UTC"}, {in: "190910 11:51:49", out: "2019-09-10 11:51:49 +0000 UTC"},
// all digits: unix secs, ms etc // all digits: unix secs, ms etc
{in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", zname: "UTC"}, {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", zname: "UTC", allowWeekdayPrefix: false},
{in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver", zname: "MDT"}, {in: "1332151919", out: "2012-03-19 10:11:59 +0000 UTC", loc: "America/Denver", zname: "MDT", allowWeekdayPrefix: false},
{in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC"}, {in: "1384216367111", out: "2013-11-12 00:32:47.111 +0000 UTC", allowWeekdayPrefix: false},
{in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC"}, {in: "1384216367111222", out: "2013-11-12 00:32:47.111222 +0000 UTC", allowWeekdayPrefix: false},
{in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC"}, {in: "1384216367111222333", out: "2013-11-12 00:32:47.111222333 +0000 UTC", allowWeekdayPrefix: false},
// other // other
{in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"}, {in: "Wed, 8 Feb 2023 19:00:46 +1100 (AEDT)", out: "2023-02-08 08:00:46 +0000 UTC"},
@ -742,6 +745,10 @@ var testInputs = []dateTest{
{in: "2014.02.13T08:33:44.555", out: "2014-02-13 08:33:44.555 +0000 UTC"}, {in: "2014.02.13T08:33:44.555", out: "2014-02-13 08:33:44.555 +0000 UTC"},
{in: "2014.02.13T08:33:44.555 PM -0700 MST", out: "2014-02-14 03:33:44.555 +0000 UTC", zname: "MST"}, {in: "2014.02.13T08:33:44.555 PM -0700 MST", out: "2014-02-14 03:33:44.555 +0000 UTC", zname: "MST"},
{in: "2014.02.13-0200", out: "2014-02-13 02:00:00 +0000 UTC"}, {in: "2014.02.13-0200", out: "2014-02-13 02:00:00 +0000 UTC"},
// Whitespace up front is now allowed
{in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"},
{in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"},
{in: " 2018-01-02 17:08:09 -07:00", out: "2018-01-03 00:08:09 +0000 UTC"},
} }
func TestParse(t *testing.T) { func TestParse(t *testing.T) {
@ -761,51 +768,70 @@ func TestParse(t *testing.T) {
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
}) })
allDays := make([]string, 0, len(knownDays))
for day := range knownDays {
allDays = append(allDays, day)
}
i := 0
for _, simpleErrorMessage := range []bool{false, true} { for _, simpleErrorMessage := range []bool{false, true} {
for _, th := range testInputs { for _, addWeekday := range []bool{false, true} {
t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, th.in), func(t *testing.T) { for _, th := range testInputs {
var ts time.Time i++
defer func() { prefix := ""
if r := recover(); r != nil { if addWeekday && th.allowWeekdayPrefix {
t.Fatalf("error: %s", r) prefix = allDays[i%len(allDays)]
if i%2 == 1 {
prefix += ","
} }
}() prefix += " "
parserOptions := []ParserOption{
PreferMonthFirst(!th.preferDayFirst),
RetryAmbiguousDateWithSwap(th.retryAmbiguous),
SimpleErrorMessages(simpleErrorMessage),
} }
if len(th.loc) > 0 { fullInput := prefix + th.in
loc, err := time.LoadLocation(th.loc)
if err != nil { t.Run(fmt.Sprintf("simpleerr-%v-%s", simpleErrorMessage, fullInput), func(t *testing.T) {
t.Fatalf("Expected to load location %q but got %v", th.loc, err) var ts time.Time
defer func() {
if r := recover(); r != nil {
t.Fatalf("error: %s", r)
}
}()
parserOptions := []ParserOption{
PreferMonthFirst(!th.preferDayFirst),
RetryAmbiguousDateWithSwap(th.retryAmbiguous),
SimpleErrorMessages(simpleErrorMessage),
} }
ts, err = ParseIn(th.in, loc, parserOptions...) if len(th.loc) > 0 {
if err != nil { loc, err := time.LoadLocation(th.loc)
t.Fatalf("expected to parse %q but got %v", th.in, err) if err != nil {
t.Fatalf("Expected to load location %q but got %v", th.loc, err)
}
ts, err = ParseIn(fullInput, loc, parserOptions...)
if err != nil {
t.Fatalf("expected to parse %q but got %v", fullInput, err)
}
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, fullInput)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, fullInput)
}
} else {
ts = MustParse(fullInput, parserOptions...)
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, fullInput)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, fullInput)
}
} }
got := fmt.Sprintf("%v", ts.In(time.UTC)) })
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in) }
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
} else {
ts = MustParse(th.in, parserOptions...)
got := fmt.Sprintf("%v", ts.In(time.UTC))
assert.Equal(t, th.out, got, "Expected %q but got %q from %q", th.out, got, th.in)
if th.out != got {
t.Fatalf("whoops, got %s, expected %s", got, th.out)
}
if len(th.zname) > 0 {
gotZone, _ := ts.Zone()
assert.Equal(t, th.zname, gotZone, "Expected zname %q but got %q from %q", th.zname, gotZone, th.in)
}
}
})
} }
} }
@ -868,12 +894,10 @@ var testParseErrors = []dateTest{
{in: `{"hello"}`, err: true}, {in: `{"hello"}`, err: true},
{in: "2009-15-12T22:15Z", err: true}, {in: "2009-15-12T22:15Z", err: true},
{in: "5,000-9,999", err: true}, {in: "5,000-9,999", err: true},
{in: "xyzq-baad"}, {in: "xyzq-baad", err: true},
{in: "oct.-7-1970", err: true}, {in: "oct.-7-1970", err: true},
{in: "septe. 7, 1970", err: true}, {in: "septe. 7, 1970", err: true},
{in: "SeptemberRR 7th, 1970", err: true}, {in: "SeptemberRR 7th, 1970", err: true},
// this is just testing the empty space up front
{in: " 2018-01-02 17:08:09 -07:00", err: true},
// a semantic version number should not be interpreted as a date // a semantic version number should not be interpreted as a date
{in: "1.22.3-78888", err: true}, {in: "1.22.3-78888", err: true},
// a semantic version number that starts with a date should not be interpreted as a date // a semantic version number that starts with a date should not be interpreted as a date
@ -1203,6 +1227,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) {
// Convenience function for debugging a particular broken test case // Convenience function for debugging a particular broken test case
func TestDebug(t *testing.T) { func TestDebug(t *testing.T) {
ts := MustParse("03.31.2014 10:11:59 MST-0700", PreferMonthFirst(true)) ts := MustParse("Monday 19/03/2012 00:00:00", RetryAmbiguousDateWithSwap(true))
assert.Equal(t, "2014-03-31 17:11:59 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2012-03-19 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
} }