From 661394b510884e7e415c95eab0370e3dbad87c8c Mon Sep 17 00:00:00 2001 From: Charles Duffy Date: Tue, 5 May 2020 21:10:21 -0500 Subject: [PATCH 1/3] Parse glibc strftime %c format (#102) This change trims day-of-week prefixes before entering into the parsing loop. This corrects parsing of glibc's strftime `%c` timestamps, and should allow significant simplification of the state machine in the future. --- parseany.go | 92 ++++++++++++++++++++++++++++++++++++++++++------ parseany_test.go | 2 ++ 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/parseany.go b/parseany.go index 5e66aa6..3ccd072 100644 --- a/parseany.go +++ b/parseany.go @@ -17,6 +17,23 @@ import ( // gou.SetColorOutput() // } +var days = []string{ + "mon", + "tue", + "wed", + "thu", + "fri", + "sat", + "sun", + "monday", + "tuesday", + "wednesday", + "thursday", + "friday", + "saturday", + "sunday", +} + var months = []string{ "january", "february", @@ -60,6 +77,7 @@ const ( dateAlphaWsDigitMore dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWsYear + dateAlphaWsDigitYearmaybe dateAlphaWsMonth dateAlphaWsMonthMore dateAlphaWsMonthSuffix @@ -587,9 +605,21 @@ iterRunes: } else { // This is possibly ambiguous? May will parse as either though. // So, it could return in-correct format. - // May 05, 2005, 05:05:05 - // May 05 2005, 05:05:05 - // Jul 05, 2005, 05:05:05 + // dateAlphaWs + // May 05, 2005, 05:05:05 + // May 05 2005, 05:05:05 + // Jul 05, 2005, 05:05:05 + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // skip & return to dateStart + // Tue 05 May 2020, 05:05:05 + // Mon Jan 2 15:04:05 2006 + + maybeDay := strings.ToLower(datestr[0:i]) + if isDay(maybeDay) { + // using skip throws off indices used by other code; saner to restart + return parseTime(datestr[i+1:], loc) + } p.stateDate = dateAlphaWs } @@ -631,11 +661,14 @@ iterRunes: // Mon Jan 02 15:04:05 -0700 2006 // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) // Mon Aug 10 15:44:11 UTC+0100 2015 - // dateAlphaWsDigit - // May 8, 2009 5:57:51 PM - // May 8 2009 5:57:51 PM - // oct 1, 1970 - // oct 7, '70 + // dateAlphaWsDigit + // May 8, 2009 5:57:51 PM + // May 8 2009 5:57:51 PM + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 + // oct 1, 1970 + // oct 7, '70 switch { case unicode.IsLetter(r): p.set(0, "Mon") @@ -653,6 +686,9 @@ iterRunes: // oct 1, 1970 // oct 7, '70 // oct. 7, 1970 + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 if r == ',' { p.daylen = i - p.dayi p.setDay() @@ -661,11 +697,31 @@ iterRunes: p.daylen = i - p.dayi p.setDay() p.yeari = i + 1 - p.stateDate = dateAlphaWsDigitMoreWs + p.stateDate = dateAlphaWsDigitYearmaybe + p.stateTime = timeStart } else if unicode.IsLetter(r) { p.stateDate = dateAlphaWsMonthSuffix i-- } + case dateAlphaWsDigitYearmaybe: + // x + // May 8 2009 5:57:51 PM + // May 8 17:57:51 2009 + // May 8 17:57:51 2009 + // May 08 17:57:51 2009 + // Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) + if r == ':' { + // Guessed wrong; was not a year + i = i - 3 + p.stateDate = dateAlphaWsDigit + p.yeari = 0 + break iterRunes + } else if r == ' ' { + // must be year format, not 15:04 + p.yearlen = i - p.yeari + p.setYear() + break iterRunes + } case dateAlphaWsDigitMore: // x // May 8, 2009 5:57:51 PM @@ -1139,7 +1195,9 @@ iterRunes: switch r { case ' ': p.set(p.offseti, "-0700") - p.yeari = i + 1 + if p.yeari == 0 { + p.yeari = i + 1 + } p.stateTime = timeWsAlphaZoneOffsetWs } case timeWsAlphaZoneOffsetWs: @@ -1630,6 +1688,12 @@ iterRunes: case dateAlphaWsAlpha: return p, nil + case dateAlphaWsDigit: + return p, nil + + case dateAlphaWsDigitYearmaybe: + return p, nil + case dateAlphaWsAlphaYearmaybe: return p, nil @@ -1854,6 +1918,14 @@ func (p *parser) parse() (time.Time, error) { } return time.ParseInLocation(string(p.format), p.datestr, p.loc) } +func isDay(alpha string) bool { + for _, day := range days { + if alpha == day { + return true + } + } + return false +} func isMonthFull(alpha string) bool { for _, month := range months { if alpha == month { diff --git a/parseany_test.go b/parseany_test.go index 2d92f43..66bb14f 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -42,6 +42,8 @@ var testInputs = []dateTest{ {in: "7 September 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, // ANSIC = "Mon Jan _2 15:04:05 2006" {in: "Mon Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, + // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" + {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006" From 293071ed832800fbda498b80146a5b035c8cb5bb Mon Sep 17 00:00:00 2001 From: Aaron Raddon Date: Wed, 30 Sep 2020 12:17:37 -0700 Subject: [PATCH 2/3] Code cleanup, remove un-used code after #103 --- parseany.go | 40 ---------------------------------------- parseany_test.go | 9 +++++---- 2 files changed, 5 insertions(+), 44 deletions(-) diff --git a/parseany.go b/parseany.go index 3ccd072..95fc011 100644 --- a/parseany.go +++ b/parseany.go @@ -84,7 +84,6 @@ const ( dateAlphaWsMore dateAlphaWsAtTime dateAlphaWsAlpha - dateAlphaWsAlphaYearmaybe dateAlphaPeriodWsDigit dateWeekdayComma dateWeekdayAbbrevComma @@ -754,42 +753,6 @@ iterRunes: break iterRunes } - case dateAlphaWsAlpha: - // Mon Jan _2 15:04:05 2006 - // Mon Jan 02 15:04:05 -0700 2006 - // Mon Jan _2 15:04:05 MST 2006 - // Mon Aug 10 15:44:11 UTC+0100 2015 - // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) - if r == ' ' { - if p.dayi > 0 { - p.daylen = i - p.dayi - p.setDay() - p.yeari = i + 1 - p.stateDate = dateAlphaWsAlphaYearmaybe - p.stateTime = timeStart - } - } else if unicode.IsDigit(r) { - if p.dayi == 0 { - p.dayi = i - } - } - - case dateAlphaWsAlphaYearmaybe: - // x - // Mon Jan _2 15:04:05 2006 - // Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) - if r == ':' { - i = i - 3 - p.stateDate = dateAlphaWsAlpha - p.yeari = 0 - break iterRunes - } else if r == ' ' { - // must be year format, not 15:04 - p.yearlen = i - p.yeari - p.setYear() - break iterRunes - } - case dateAlphaWsMonth: // April 8, 2009 // April 8 2009 @@ -1694,9 +1657,6 @@ iterRunes: case dateAlphaWsDigitYearmaybe: return p, nil - case dateAlphaWsAlphaYearmaybe: - return p, nil - case dateDigitSlash: // 3/1/2014 // 10/13/2014 diff --git a/parseany_test.go b/parseany_test.go index 66bb14f..3b37bf9 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -11,8 +11,8 @@ import ( func TestOne(t *testing.T) { time.Local = time.UTC var ts time.Time - ts = MustParse("2018.09.30") - assert.Equal(t, "2018-09-30 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) + ts = MustParse("Mon 30 Sep 2018 09:09:09 PM UTC") + assert.Equal(t, "2018-09-30 21:09:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) } type dateTest struct { @@ -42,10 +42,11 @@ var testInputs = []dateTest{ {in: "7 September 1970", out: "1970-09-07 00:00:00 +0000 UTC"}, // ANSIC = "Mon Jan _2 15:04:05 2006" {in: "Mon Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"}, - // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" - {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, {in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"}, + // ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC" + {in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"}, + {in: "Mon 30 Sep 2018 09:09:09 PM UTC", out: "2018-09-30 21:09:09 +0000 UTC"}, // RubyDate = "Mon Jan 02 15:04:05 -0700 2006" {in: "Mon Jan 02 15:04:05 -0700 2006", out: "2006-01-02 22:04:05 +0000 UTC"}, {in: "Thu May 08 11:57:51 -0700 2009", out: "2009-05-08 18:57:51 +0000 UTC"}, From 1c5dd97ab65e1ab32d738b0b1dbb8121c70ecbdd Mon Sep 17 00:00:00 2001 From: Aaron Raddon Date: Wed, 30 Sep 2020 12:27:10 -0700 Subject: [PATCH 3/3] doc cleanup from #103 --- README.md | 7 +++++-- example/main.go | 10 ++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2a62105..b67f874 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,9 @@ var examples = []string{ "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", "Mon, 02 Jan 2006 15:04:05 -0700", "Thu, 4 Jan 2018 17:53:36 +0000", + "Mon 30 Sep 2018 09:09:09 PM UTC", "Mon Aug 10 15:44:11 UTC+0100 2015", + "Thu, 4 Jan 2018 17:53:36 +0000", "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", "September 17, 2012 10:09am", "September 17, 2012 at 10:09am PST-08", @@ -199,8 +201,9 @@ func main() { | Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | | Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | | Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | | Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | | Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT | | September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | | September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | @@ -238,7 +241,7 @@ func main() { | 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | | 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | | 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | | 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | | 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | | 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC | diff --git a/example/main.go b/example/main.go index 8cdbbc9..56cec98 100644 --- a/example/main.go +++ b/example/main.go @@ -5,8 +5,8 @@ import ( "fmt" "time" - "github.com/scylladb/termtables" "github.com/araddon/dateparse" + "github.com/scylladb/termtables" ) var examples = []string{ @@ -22,8 +22,9 @@ var examples = []string{ "Mon, 02 Jan 2006 15:04:05 MST", "Tue, 11 Jul 2017 16:28:13 +0200 (CEST)", "Mon, 02 Jan 2006 15:04:05 -0700", - "Thu, 4 Jan 2018 17:53:36 +0000", + "Mon 30 Sep 2018 09:09:09 PM UTC", "Mon Aug 10 15:44:11 UTC+0100 2015", + "Thu, 4 Jan 2018 17:53:36 +0000", "Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)", "September 17, 2012 10:09am", "September 17, 2012 at 10:09am PST-08", @@ -156,8 +157,9 @@ func main() { | Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST | | Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 | | Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 | -| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | +| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC | | Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC | +| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC | | Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT | | September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC | | September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST | @@ -195,7 +197,7 @@ func main() { | 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC | | 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC | | 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC | -| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | +| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC | | 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC | | 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 | | 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC |