Parse glibc strftime %c format (#102)

This change trims day-of-week prefixes before entering into the parsing loop. This corrects parsing of glibc's strftime `%c` timestamps, and should allow significant simplification of the state machine in the future.
This commit is contained in:
Charles Duffy 2020-05-05 21:10:21 -05:00
parent d820a6159a
commit 661394b510
No known key found for this signature in database
GPG Key ID: 643F86964B84FB01
2 changed files with 84 additions and 10 deletions

View File

@ -17,6 +17,23 @@ import (
// gou.SetColorOutput()
// }
var days = []string{
"mon",
"tue",
"wed",
"thu",
"fri",
"sat",
"sun",
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
}
var months = []string{
"january",
"february",
@ -60,6 +77,7 @@ const (
dateAlphaWsDigitMore
dateAlphaWsDigitMoreWs
dateAlphaWsDigitMoreWsYear
dateAlphaWsDigitYearmaybe
dateAlphaWsMonth
dateAlphaWsMonthMore
dateAlphaWsMonthSuffix
@ -587,9 +605,21 @@ iterRunes:
} else {
// This is possibly ambiguous? May will parse as either though.
// So, it could return in-correct format.
// dateAlphaWs
// May 05, 2005, 05:05:05
// May 05 2005, 05:05:05
// Jul 05, 2005, 05:05:05
// May 8 17:57:51 2009
// May 8 17:57:51 2009
// skip & return to dateStart
// Tue 05 May 2020, 05:05:05
// Mon Jan 2 15:04:05 2006
maybeDay := strings.ToLower(datestr[0:i])
if isDay(maybeDay) {
// using skip throws off indices used by other code; saner to restart
return parseTime(datestr[i+1:], loc)
}
p.stateDate = dateAlphaWs
}
@ -634,6 +664,9 @@ iterRunes:
// dateAlphaWsDigit
// May 8, 2009 5:57:51 PM
// May 8 2009 5:57:51 PM
// May 8 17:57:51 2009
// May 8 17:57:51 2009
// May 08 17:57:51 2009
// oct 1, 1970
// oct 7, '70
switch {
@ -653,6 +686,9 @@ iterRunes:
// oct 1, 1970
// oct 7, '70
// oct. 7, 1970
// May 8 17:57:51 2009
// May 8 17:57:51 2009
// May 08 17:57:51 2009
if r == ',' {
p.daylen = i - p.dayi
p.setDay()
@ -661,11 +697,31 @@ iterRunes:
p.daylen = i - p.dayi
p.setDay()
p.yeari = i + 1
p.stateDate = dateAlphaWsDigitMoreWs
p.stateDate = dateAlphaWsDigitYearmaybe
p.stateTime = timeStart
} else if unicode.IsLetter(r) {
p.stateDate = dateAlphaWsMonthSuffix
i--
}
case dateAlphaWsDigitYearmaybe:
// x
// May 8 2009 5:57:51 PM
// May 8 17:57:51 2009
// May 8 17:57:51 2009
// May 08 17:57:51 2009
// Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)
if r == ':' {
// Guessed wrong; was not a year
i = i - 3
p.stateDate = dateAlphaWsDigit
p.yeari = 0
break iterRunes
} else if r == ' ' {
// must be year format, not 15:04
p.yearlen = i - p.yeari
p.setYear()
break iterRunes
}
case dateAlphaWsDigitMore:
// x
// May 8, 2009 5:57:51 PM
@ -1139,7 +1195,9 @@ iterRunes:
switch r {
case ' ':
p.set(p.offseti, "-0700")
if p.yeari == 0 {
p.yeari = i + 1
}
p.stateTime = timeWsAlphaZoneOffsetWs
}
case timeWsAlphaZoneOffsetWs:
@ -1630,6 +1688,12 @@ iterRunes:
case dateAlphaWsAlpha:
return p, nil
case dateAlphaWsDigit:
return p, nil
case dateAlphaWsDigitYearmaybe:
return p, nil
case dateAlphaWsAlphaYearmaybe:
return p, nil
@ -1854,6 +1918,14 @@ func (p *parser) parse() (time.Time, error) {
}
return time.ParseInLocation(string(p.format), p.datestr, p.loc)
}
func isDay(alpha string) bool {
for _, day := range days {
if alpha == day {
return true
}
}
return false
}
func isMonthFull(alpha string) bool {
for _, month := range months {
if alpha == month {

View File

@ -42,6 +42,8 @@ var testInputs = []dateTest{
{in: "7 September 1970", out: "1970-09-07 00:00:00 +0000 UTC"},
// ANSIC = "Mon Jan _2 15:04:05 2006"
{in: "Mon Jan 2 15:04:05 2006", out: "2006-01-02 15:04:05 +0000 UTC"},
// ANSIC_GLIBC = "Mon 02 Jan 2006 03:04:05 PM UTC"
{in: "Mon 02 Jan 2006 03:04:05 PM UTC", out: "2006-01-02 15:04:05 +0000 UTC"},
{in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"},
{in: "Thu May 8 17:57:51 2009", out: "2009-05-08 17:57:51 +0000 UTC"},
// RubyDate = "Mon Jan 02 15:04:05 -0700 2006"