From 661394b510884e7e415c95eab0370e3dbad87c8c Mon Sep 17 00:00:00 2001 From: Charles Duffy Date: Tue, 5 May 2020 21:10:21 -0500 Subject: [PATCH] Parse glibc strftime %c format (#102) This change trims day-of-week prefixes before entering into the parsing loop. This corrects parsing of glibc's strftime `%c` timestamps, and should allow significant simplification of the state machine in the future. --- parseany.go | 92 ++++++++++++++++++++++++++++++++++++++++++------ parseany_test.go | 2 ++ 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/parseany.go b/parseany.go index 5e66aa6..3ccd072 100644 --- a/parseany.go +++ b/parseany.go @@ -17,6 +17,23 @@ import ( // gou.SetColorOutput() // } +var days = []string{ + "mon", + "tue", + "wed", + "thu", + "fri", + "sat", + "sun", + "monday", + "tuesday", + "wednesday", + "thursday", + "friday", + "saturday", + "sunday", +} + var months = []string{ "january", "february", @@ -60,6 +77,7 @@ const ( dateAlphaWsDigitMore dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear + dateAlphaWsDigitYearmaybe dateAlphaWsMonth dateAlphaWsMonthMore dateAlphaWsMonthSuffix @@ -587,9 +605,21 @@ iterRunes: } else { // This is possibly ambiguous? May will parse as either though. // So, it could return in-correct format. - // May 05, 2005, 05:05:05 - // May 05 2005, 05:05:05 - // Jul 05, 2005, 05:05:05 + // dateAlphaWs + // May 05, 2005, 05:05:05 + // May 05 2005, 05:05:05 + // Jul 05, 2005, 05:05:05 + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // skip & return to dateStart + // Tue 05 May 2020, 05:05:05 + // Mon Jan 2 15:04:05 2006 + + maybeDay := strings.ToLower(datestr[0:i]) + if isDay(maybeDay) { + // using skip throws off indices used by other code; saner to restart + return parseTime(datestr[i+1:], loc) + } p.stateDate = dateAlphaWs } @@ -631,11 +661,14 @@ iterRunes: // Mon Jan 02 15:04:05 -0700 2006 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) // Mon Aug 10 15:44:11 UTC+0100 2015 - // dateAlphaWsDigit - // May 8, 2009 5:57:51 PM - // May 8 2009 5:57:51 PM - // oct 1, 1970 - // oct 7, '70 + // dateAlphaWsDigit + // May 8, 2009 5:57:51 PM + // May 8 2009 5:57:51 PM + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 + // oct 1, 1970 + // oct 7, '70 switch { case unicode.IsLetter(r): p.set(0, "Mon") @@ -653,6 +686,9 @@ iterRunes: // oct 1, 1970 // oct 7, '70 // oct. 7, 1970 + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 if r == ',' { p.daylen = i - p.dayi p.setDay() @@ -661,11 +697,31 @@ iterRunes: p.daylen = i - p.dayi p.setDay() p.yeari = i + 1 - p.stateDate = dateAlphaWsDigitMoreWs + p.stateDate = dateAlphaWsDigitYearmaybe + p.stateTime = timeStart } else if unicode.IsLetter(r) { p.stateDate = dateAlphaWsMonthSuffix i-- } + case dateAlphaWsDigitYearmaybe: + // x + // May 8 2009 5:57:51 PM + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 + // Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) + if r == ':' { + // Guessed wrong; was not a year + i = i - 3 + p.stateDate = dateAlphaWsDigit + p.yeari = 0 + break iterRunes + } else if r == ' ' { + // must be year format, not 15:04 + p.yearlen = i - p.yeari + p.setYear() + break iterRunes + } case dateAlphaWsDigitMore: // x // May 8, 2009 5:57:51 PM @@ -1139,7 +1195,9 @@ iterRunes: switch r { case ' ': p.set(p.offseti, "-0700") - p.yeari = i + 1 + if p.yeari == 0 { + p.yeari = i + 1 + } p.stateTime = timeWsAlphaZoneOffsetWs } case timeWsAlphaZoneOffsetWs: @@ -1630,6 +1688,12 @@ iterRunes: case dateAlphaWsAlpha: return p, nil + case dateAlphaWsDigit: + return p, nil + + case dateAlphaWsDigitYearmaybe: + return p, nil + case dateAlphaWsAlphaYearmaybe: return p, nil @@ -1854,6 +1918,14 @@ func (p *parser) parse() (time.Time, error) { } return time.ParseInLocation(string(p.format), p.datestr, p.loc) } +func isDay(alpha string) bool { + for _, day := range days { + if alpha == day { + return true + } + } + return false +} func isMonthFull(alpha string) bool { for _, month := range months { if alpha == month { diff --git a/parseany_test.go b/parseany_test.go index 2d92f43..66bb14f 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -42,6 +42,8 @@ var testInputs = []dateTest{ {in: "7 September 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, // ANSIC = "Mon Jan _2 15:04:05 2006" {in: "Mon Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, + // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" + {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006"