Merge pull request #52 from araddon/parse_format

parse format refs #33
This commit is contained in:
Aaron Raddon 2018-03-24 15:57:37 -07:00 committed by GitHub
commit affe10976d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 123 additions and 41 deletions

View File

@ -90,7 +90,11 @@ var (
// ParseAny parse an unknown date format, detect the layout, parse. // ParseAny parse an unknown date format, detect the layout, parse.
// Normal parse. Equivalent Timezone rules as time.Parse() // Normal parse. Equivalent Timezone rules as time.Parse()
func ParseAny(datestr string) (time.Time, error) { func ParseAny(datestr string) (time.Time, error) {
return parseTime(datestr, nil) p, err := parseTime(datestr, nil)
if err != nil {
return time.Time{}, err
}
return p.parse()
} }
// ParseIn with Location, equivalent to time.ParseInLocation() timezone/offset // ParseIn with Location, equivalent to time.ParseInLocation() timezone/offset
@ -99,7 +103,11 @@ func ParseAny(datestr string) (time.Time, error) {
// That is, MST means one thing when using America/Denver and something else // That is, MST means one thing when using America/Denver and something else
// in other locations. // in other locations.
func ParseIn(datestr string, loc *time.Location) (time.Time, error) { func ParseIn(datestr string, loc *time.Location) (time.Time, error) {
return parseTime(datestr, loc) p, err := parseTime(datestr, loc)
if err != nil {
return time.Time{}, err
}
return p.parse()
} }
// ParseLocal Given an unknown date format, detect the layout, // ParseLocal Given an unknown date format, detect the layout,
@ -118,29 +126,44 @@ func ParseIn(datestr string, loc *time.Location) (time.Time, error) {
// t, err := dateparse.ParseIn("3/1/2014", denverLoc) // t, err := dateparse.ParseIn("3/1/2014", denverLoc)
// //
func ParseLocal(datestr string) (time.Time, error) { func ParseLocal(datestr string) (time.Time, error) {
return parseTime(datestr, time.Local) p, err := parseTime(datestr, time.Local)
if err != nil {
return time.Time{}, err
}
return p.parse()
} }
// MustParse parse a date, and panic if it can't be parsed. Used for testing. // MustParse parse a date, and panic if it can't be parsed. Used for testing.
// Not recommended for most use-cases. // Not recommended for most use-cases.
func MustParse(datestr string) time.Time { func MustParse(datestr string) time.Time {
t, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil)
if err != nil {
panic(err.Error())
}
t, err := p.parse()
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())
} }
return t return t
} }
func parse(layout, datestr string, loc *time.Location) (time.Time, error) { // ParseFormat parse an unknown date format, detect the layout and return layout.
if loc == nil { func ParseFormat(datestr string) (string, error) {
return time.Parse(layout, datestr) p, err := parseTime(datestr, nil)
if err != nil {
return "", err
} }
return time.ParseInLocation(layout, datestr, loc) _, err = p.parse()
if err != nil {
return "", err
}
return string(p.format), nil
} }
type parser struct { type parser struct {
loc *time.Location loc *time.Location
preferMonthFirst bool preferMonthFirst bool
ambiguousMD bool
stateDate dateState stateDate dateState
stateTime timeState stateTime timeState
format []byte format []byte
@ -166,6 +189,7 @@ type parser struct {
offsetlen int offsetlen int
tzi int tzi int
tzlen int tzlen int
t *time.Time
} }
func newParser(dateStr string, loc *time.Location) *parser { func newParser(dateStr string, loc *time.Location) *parser {
@ -283,6 +307,9 @@ func (p *parser) trimExtra() {
} }
func (p *parser) parse() (time.Time, error) { func (p *parser) parse() (time.Time, error) {
if p.t != nil {
return *p.t, nil
}
if p.skip > 0 && len(p.format) > p.skip { if p.skip > 0 && len(p.format) > p.skip {
p.format = p.format[p.skip:] p.format = p.format[p.skip:]
p.datestr = p.datestr[p.skip:] p.datestr = p.datestr[p.skip:]
@ -293,7 +320,7 @@ func (p *parser) parse() (time.Time, error) {
} }
return time.ParseInLocation(string(p.format), p.datestr, p.loc) return time.ParseInLocation(string(p.format), p.datestr, p.loc)
} }
func parseTime(datestr string, loc *time.Location) (time.Time, error) { func parseTime(datestr string, loc *time.Location) (*parser, error) {
p := newParser(datestr, loc) p := newParser(datestr, loc)
i := 0 i := 0
@ -316,6 +343,8 @@ iterRunes:
p.stateDate = dateDigit p.stateDate = dateDigit
} else if unicode.IsLetter(r) { } else if unicode.IsLetter(r) {
p.stateDate = dateAlpha p.stateDate = dateAlpha
} else {
return nil, fmt.Errorf("unrecognized first character '%s' in %v", string(r), datestr)
} }
case dateDigit: case dateDigit:
@ -341,6 +370,7 @@ iterRunes:
p.moi = i + 1 p.moi = i + 1
p.setYear() p.setYear()
} else { } else {
p.ambiguousMD = true
if p.preferMonthFirst { if p.preferMonthFirst {
if p.molen == 0 { if p.molen == 0 {
p.molen = i p.molen = i
@ -352,6 +382,7 @@ iterRunes:
case '.': case '.':
// 3.31.2014 // 3.31.2014
p.ambiguousMD = true
p.moi = 0 p.moi = 0
p.molen = i p.molen = i
p.setMonth() p.setMonth()
@ -426,7 +457,6 @@ iterRunes:
// 2017-07-19 03:21:51+00:00 // 2017-07-19 03:21:51+00:00
// 2013-04-01 22:43:22 // 2013-04-01 22:43:22
// 2014-04-26 05:24:37 PM // 2014-04-26 05:24:37 PM
switch r { switch r {
case ' ': case ' ':
p.daylen = i - p.dayi p.daylen = i - p.dayi
@ -487,11 +517,6 @@ iterRunes:
p.yeari = i + 1 p.yeari = i + 1
} }
} }
default:
// if unicode.IsDigit(r) || r == '/' {
// continue
// }
} }
case dateDigitWs: case dateDigitWs:
@ -583,7 +608,6 @@ iterRunes:
// Thu, 13 Jul 2017 08:58:40 +0100 // Thu, 13 Jul 2017 08:58:40 +0100
// Tue, 11 Jul 2017 16:28:13 +0200 (CEST) // Tue, 11 Jul 2017 16:28:13 +0200 (CEST)
// Mon, 02-Jan-06 15:04:05 MST // Mon, 02-Jan-06 15:04:05 MST
switch { switch {
case r == ' ': case r == ' ':
if i > 4 { if i > 4 {
@ -591,7 +615,8 @@ iterRunes:
// This one doesn't follow standard parse methodologies. the "January" // This one doesn't follow standard parse methodologies. the "January"
// is difficult to use the format string replace method because of its variable-length (march, june) // is difficult to use the format string replace method because of its variable-length (march, june)
// so we just use this format here. If we see more similar to this we will do something else. // so we just use this format here. If we see more similar to this we will do something else.
return parse("January 02, 2006 at 3:04pm MST-07", datestr, loc) p.format = []byte("January 02, 2006 at 3:04pm MST-07")
return p, nil
} }
p.stateDate = dateAlphaWs p.stateDate = dateAlphaWs
case r == ',': case r == ',':
@ -609,7 +634,6 @@ iterRunes:
// the mon, monday, they are all superfelous and not needed // the mon, monday, they are all superfelous and not needed
// just lay down the skip, no need to fill and then skip // just lay down the skip, no need to fill and then skip
} }
} }
case dateWeekdayComma: case dateWeekdayComma:
// Monday, 02 Jan 2006 15:04:05 MST // Monday, 02 Jan 2006 15:04:05 MST
@ -1242,9 +1266,11 @@ iterRunes:
t = time.Unix(0, miliSecs*1000*1000) t = time.Unix(0, miliSecs*1000*1000)
} }
} else if len(datestr) == len("20140601") { } else if len(datestr) == len("20140601") {
return parse("20060102", datestr, loc) p.format = []byte("20060102")
return p, nil
} else if len(datestr) == len("2014") { } else if len(datestr) == len("2014") {
return parse("2006", datestr, loc) p.format = []byte("2006")
return p, nil
} }
if t.IsZero() { if t.IsZero() {
if secs, err := strconv.ParseInt(datestr, 10, 64); err == nil { if secs, err := strconv.ParseInt(datestr, 10, 64); err == nil {
@ -1253,41 +1279,45 @@ iterRunes:
// nothing before unix-epoch // nothing before unix-epoch
} else { } else {
t = time.Unix(secs, 0) t = time.Unix(secs, 0)
p.t = &t
} }
} }
} }
if !t.IsZero() { if !t.IsZero() {
if loc == nil { if loc == nil {
return t, nil p.t = &t
return p, nil
} }
return t.In(loc), nil t = t.In(loc)
p.t = &t
return p, nil
} }
case dateDigitDash: case dateDigitDash:
// 2006-01 // 2006-01
return p.parse() return p, nil
case dateDigitDashDash: case dateDigitDashDash:
// 2006-01-02 // 2006-01-02
// 2006-1-02 // 2006-1-02
// 2006-1-2 // 2006-1-2
// 2006-01-2 // 2006-01-2
return p.parse() return p, nil
case dateDigitDashDashAlpha: case dateDigitDashDashAlpha:
// 2013-Feb-03 // 2013-Feb-03
// 2013-Feb-3 // 2013-Feb-3
p.daylen = i - p.dayi p.daylen = i - p.dayi
p.setDay() p.setDay()
return p.parse() return p, nil
case dateDigitDashDashWs: // starts digit then dash 02- then whitespace 1 << 2 << 5 + 3 case dateDigitDashDashWs: // starts digit then dash 02- then whitespace 1 << 2 << 5 + 3
// 2013-04-01 22:43:22 // 2013-04-01 22:43:22
// 2013-04-01 22:43 // 2013-04-01 22:43
return p.parse() return p, nil
case dateDigitDashDashT: case dateDigitDashDashT:
return p.parse() return p, nil
case dateDigitDotDot: case dateDigitDotDot:
// 03.31.1981 // 03.31.1981
@ -1295,7 +1325,7 @@ iterRunes:
// 3.2.81 // 3.2.81
p.setYear() p.setYear()
p.yearlen = i - p.yeari p.yearlen = i - p.yeari
return p.parse() return p, nil
case dateDigitWsMoYear: case dateDigitWsMoYear:
// 2 Jan 2018 // 2 Jan 2018
@ -1305,59 +1335,63 @@ iterRunes:
// 02 Jan 2018 23:59:45 // 02 Jan 2018 23:59:45
// 12 Feb 2006, 19:17 // 12 Feb 2006, 19:17
// 12 Feb 2006, 19:17:22 // 12 Feb 2006, 19:17:22
return p.parse() return p, nil
case dateDigitWsMolong: case dateDigitWsMolong:
// 18 January 2018 // 18 January 2018
// 8 January 2018 // 8 January 2018
if p.daylen == 2 { if p.daylen == 2 {
return parse("02 January 2006", datestr, loc) p.format = []byte("02 January 2006")
return p, nil
} }
return parse("2 January 2006", datestr, loc) p.format = []byte("2 January 2006")
return p, nil // parse("2 January 2006", datestr, loc)
case dateAlphaWsDigitCommaWs: case dateAlphaWsDigitCommaWs:
// oct 1, 1970 // oct 1, 1970
p.yearlen = i - p.yeari p.yearlen = i - p.yeari
p.setYear() p.setYear()
return p.parse() return p, nil
case dateAlphaWsDigitCommaWsYear: case dateAlphaWsDigitCommaWsYear:
// May 8, 2009 5:57:51 PM // May 8, 2009 5:57:51 PM
return p.parse() return p, nil
case dateAlphaWsAlpha: case dateAlphaWsAlpha:
return p.parse() return p, nil
case dateAlphaWsAlphaYearmaybe: case dateAlphaWsAlphaYearmaybe:
return p.parse() return p, nil
case dateDigitSlash: case dateDigitSlash:
// 3/1/2014 // 3/1/2014
// 10/13/2014 // 10/13/2014
// 01/02/2006 // 01/02/2006
// 2014/10/13 // 2014/10/13
return p.parse() return p, nil
case dateDigitChineseYear: case dateDigitChineseYear:
// dateDigitChineseYear // dateDigitChineseYear
// 2014年04月08日 // 2014年04月08日
return parse("2006年01月02日", datestr, loc) p.format = []byte("2006年01月02日")
return p, nil
case dateDigitChineseYearWs: case dateDigitChineseYearWs:
return parse("2006年01月02日 15:04:05", datestr, loc) p.format = []byte("2006年01月02日 15:04:05")
return p, nil
case dateWeekdayComma: case dateWeekdayComma:
// Monday, 02 Jan 2006 15:04:05 -0700 // Monday, 02 Jan 2006 15:04:05 -0700
// Monday, 02 Jan 2006 15:04:05 +0100 // Monday, 02 Jan 2006 15:04:05 +0100
// Monday, 02-Jan-06 15:04:05 MST // Monday, 02-Jan-06 15:04:05 MST
return p.parse() return p, nil
case dateWeekdayAbbrevComma: case dateWeekdayAbbrevComma:
// Mon, 02-Jan-06 15:04:05 MST // Mon, 02-Jan-06 15:04:05 MST
// Mon, 02 Jan 2006 15:04:05 MST // Mon, 02 Jan 2006 15:04:05 MST
return p.parse() return p, nil
} }
return time.Time{}, fmt.Errorf("Could not find date format for %s", datestr) return nil, fmt.Errorf("Could not find date format for %s", datestr)
} }

View File

@ -111,6 +111,16 @@ func TestInLocation(t *testing.T) {
_, offset = ts.Zone() _, offset = ts.Zone()
assert.NotEqual(t, 0, offset, "Should have found offset %v", offset) assert.NotEqual(t, 0, offset, "Should have found offset %v", offset)
assert.Equal(t, "2006-01-02 22:04:05 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC))) assert.Equal(t, "2006-01-02 22:04:05 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
// Now some errors
zeroTime := time.Time{}.Unix()
ts, err = ParseIn("INVALID", denverLoc)
assert.Equal(t, zeroTime, ts.Unix())
assert.NotEqual(t, nil, err)
ts, err = ParseLocal("INVALID")
assert.Equal(t, zeroTime, ts.Unix())
assert.NotEqual(t, nil, err)
} }
func TestOne(t *testing.T) { func TestOne(t *testing.T) {
@ -461,6 +471,8 @@ func TestParse(t *testing.T) {
// some errors // some errors
assert.Equal(t, true, testDidPanic(`{"ts":"now"}`))
_, err = ParseAny("138421636711122233311111") // too many digits _, err = ParseAny("138421636711122233311111") // too many digits
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
@ -499,3 +511,39 @@ func TestPStruct(t *testing.T) {
assert.True(t, len(p.ds()) > 0) assert.True(t, len(p.ds()) > 0)
assert.True(t, len(p.ts()) > 0) assert.True(t, len(p.ts()) > 0)
} }
var testParseFormat = []dateTest{
{in: "oct 7, 1970", out: "Jan 2, 2006"},
// 03 February 2013
{in: "03 February 2013", out: "02 January 2006"},
// 13:31:51.999 -07:00 MST
// yyyy-mm-dd hh:mm:ss +00:00
{in: "2012-08-03 18:31:59 +00:00", out: "2006-01-02 15:04:05 -07:00"},
// yyyy-mm-dd hh:mm:ss +0000 TZ
// Golang Native Format
{in: "2012-08-03 18:31:59 +0000 UTC", out: "2006-01-02 15:04:05 -0700 UTC"},
// yyyy-mm-dd hh:mm:ss TZ
{in: "2012-08-03 18:31:59 UTC", out: "2006-01-02 15:04:05 UTC"},
// yyyy-mm-ddThh:mm:ss-07:00
{in: "2009-08-12T22:15:09-07:00", out: "2006-01-02T15:04:05-07:00"},
// yyyy-mm-ddThh:mm:ss-0700
{in: "2009-08-12T22:15:09-0700", out: "2006-01-02T15:04:05-0700"},
// yyyy-mm-ddThh:mm:ssZ
{in: "2009-08-12T22:15Z", out: "2006-01-02T15:04Z"},
}
func TestParseLayout(t *testing.T) {
for _, th := range testParseFormat {
l, err := ParseFormat(th.in)
assert.Equal(t, nil, err)
assert.Equal(t, th.out, l, "for in=%v", th.in)
}
// errors
_, err := ParseFormat(`{"hello"}`)
assert.NotEqual(t, nil, err)
_, err = ParseFormat("2009-15-12T22:15Z")
assert.NotEqual(t, nil, err)
}