Merge pull request #89 from troyspencer/master

Options framework to allow more control over parsing
This commit is contained in:
Aaron Raddon 2020-09-30 12:45:49 -07:00 committed by GitHub
commit 9cb150cd12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 154 additions and 58 deletions

View File

@ -144,8 +144,8 @@ func unknownErr(datestr string) error {
// ParseAny parse an unknown date format, detect the layout. // ParseAny parse an unknown date format, detect the layout.
// Normal parse. Equivalent Timezone rules as time.Parse(). // Normal parse. Equivalent Timezone rules as time.Parse().
// NOTE: please see readme on mmdd vs ddmm ambiguous dates. // NOTE: please see readme on mmdd vs ddmm ambiguous dates.
func ParseAny(datestr string) (time.Time, error) { func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -157,8 +157,8 @@ func ParseAny(datestr string) (time.Time, error) {
// datestring, it uses the given location rules for any zone interpretation. // datestring, it uses the given location rules for any zone interpretation.
// That is, MST means one thing when using America/Denver and something else // That is, MST means one thing when using America/Denver and something else
// in other locations. // in other locations.
func ParseIn(datestr string, loc *time.Location) (time.Time, error) { func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, loc) p, err := parseTime(datestr, loc, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -180,8 +180,8 @@ func ParseIn(datestr string, loc *time.Location) (time.Time, error) {
// //
// t, err := dateparse.ParseIn("3/1/2014", denverLoc) // t, err := dateparse.ParseIn("3/1/2014", denverLoc)
// //
func ParseLocal(datestr string) (time.Time, error) { func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, time.Local) p, err := parseTime(datestr, time.Local, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -190,8 +190,8 @@ func ParseLocal(datestr string) (time.Time, error) {
// MustParse parse a date, and panic if it can't be parsed. Used for testing. // MustParse parse a date, and panic if it can't be parsed. Used for testing.
// Not recommended for most use-cases. // Not recommended for most use-cases.
func MustParse(datestr string) time.Time { func MustParse(datestr string, opts ...ParserOption) time.Time {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())
} }
@ -208,8 +208,8 @@ func MustParse(datestr string) time.Time {
// layout, err := dateparse.ParseFormat("2013-02-01 00:00:00") // layout, err := dateparse.ParseFormat("2013-02-01 00:00:00")
// // layout = "2006-01-02 15:04:05" // // layout = "2006-01-02 15:04:05"
// //
func ParseFormat(datestr string) (string, error) { func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -222,8 +222,8 @@ func ParseFormat(datestr string) (string, error) {
// ParseStrict parse an unknown date format. IF the date is ambigous // ParseStrict parse an unknown date format. IF the date is ambigous
// mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc // mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc
func ParseStrict(datestr string) (time.Time, error) { func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -233,9 +233,31 @@ func ParseStrict(datestr string) (time.Time, error) {
return p.parse() return p.parse()
} }
func parseTime(datestr string, loc *time.Location) (*parser, error) { func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) {
p = newParser(datestr, loc, opts...)
if p.retryAmbiguousDateWithSwap {
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
// by retrying in this case, we can fix a common situation with no assumptions
defer func() {
if p.ambiguousMD {
// if it errors out with the following error, swap before we
// get out of this function to reduce scope it needs to be applied on
_, err := p.parse()
if err != nil && strings.Contains(err.Error(), "month out of range") {
// create the option to reverse the preference
preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst)
// turn off the retry to avoid endless recursion
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap)
p, err = parseTime(datestr, time.Local, modifiedOpts...)
}
}
}()
}
p := newParser(datestr, loc)
i := 0 i := 0
// General strategy is to read rune by rune through the date looking for // General strategy is to read rune by rune through the date looking for
@ -293,6 +315,12 @@ iterRunes:
p.setMonth() p.setMonth()
p.dayi = i + 1 p.dayi = i + 1
} }
} else {
if p.daylen == 0 {
p.daylen = i
p.setDay()
p.moi = i + 1
}
} }
} }
@ -489,6 +517,12 @@ iterRunes:
p.setDay() p.setDay()
p.yeari = i + 1 p.yeari = i + 1
} }
} else {
if p.molen == 0 {
p.molen = i - p.moi
p.setMonth()
p.yeari = i + 1
}
} }
} }
@ -712,7 +746,7 @@ iterRunes:
} else if i == 4 { } else if i == 4 {
// gross // gross
datestr = datestr[0:i-1] + datestr[i:] datestr = datestr[0:i-1] + datestr[i:]
return parseTime(datestr, loc) return parseTime(datestr, loc, opts...)
} else { } else {
return nil, unknownErr(datestr) return nil, unknownErr(datestr)
} }
@ -867,25 +901,25 @@ iterRunes:
case 't', 'T': case 't', 'T':
if p.nextIs(i, 'h') || p.nextIs(i, 'H') { if p.nextIs(i, 'h') || p.nextIs(i, 'H') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 'n', 'N': case 'n', 'N':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 's', 'S': case 's', 'S':
if p.nextIs(i, 't') || p.nextIs(i, 'T') { if p.nextIs(i, 't') || p.nextIs(i, 'T') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 'r', 'R': case 'r', 'R':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
} }
@ -1059,7 +1093,7 @@ iterRunes:
// 2014-05-11 08:20:13,787 // 2014-05-11 08:20:13,787
ds := []byte(p.datestr) ds := []byte(p.datestr)
ds[i] = '.' ds[i] = '.'
return parseTime(string(ds), loc) return parseTime(string(ds), loc, opts...)
case '-', '+': case '-', '+':
// 03:21:51+00:00 // 03:21:51+00:00
p.stateTime = timeOffset p.stateTime = timeOffset
@ -1765,6 +1799,7 @@ iterRunes:
type parser struct { type parser struct {
loc *time.Location loc *time.Location
preferMonthFirst bool preferMonthFirst bool
retryAmbiguousDateWithSwap bool
ambiguousMD bool ambiguousMD bool
stateDate dateState stateDate dateState
stateTime timeState stateTime timeState
@ -1795,16 +1830,42 @@ type parser struct {
t *time.Time t *time.Time
} }
func newParser(dateStr string, loc *time.Location) *parser { // ParserOption defines a function signature implemented by options
p := parser{ // Options defined like this accept the parser and operate on the data within
type ParserOption func(*parser) error
// PreferMonthFirst is an option that allows preferMonthFirst to be changed from its default
func PreferMonthFirst(preferMonthFirst bool) ParserOption {
return func(p *parser) error {
p.preferMonthFirst = preferMonthFirst
return nil
}
}
// RetryAmbiguousDateWithSwap is an option that allows retryAmbiguousDateWithSwap to be changed from its default
func RetryAmbiguousDateWithSwap(retryAmbiguousDateWithSwap bool) ParserOption {
return func(p *parser) error {
p.retryAmbiguousDateWithSwap = retryAmbiguousDateWithSwap
return nil
}
}
func newParser(dateStr string, loc *time.Location, opts ...ParserOption) *parser {
p := &parser{
stateDate: dateStart, stateDate: dateStart,
stateTime: timeIgnore, stateTime: timeIgnore,
datestr: dateStr, datestr: dateStr,
loc: loc, loc: loc,
preferMonthFirst: true, preferMonthFirst: true,
retryAmbiguousDateWithSwap: false,
} }
p.format = []byte(dateStr) p.format = []byte(dateStr)
return &p
// allow the options to mutate the parser fields from their defaults
for _, option := range opts {
option(p)
}
return p
} }
func (p *parser) nextIs(i int, b byte) bool { func (p *parser) nextIs(i int, b byte) bool {

View File

@ -686,3 +686,38 @@ func TestInLocation(t *testing.T) {
assert.Equal(t, zeroTime, ts.Unix()) assert.Equal(t, zeroTime, ts.Unix())
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
} }
func TestPreferMonthFirst(t *testing.T) {
// default case is true
ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC")
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
preferMonthFirstTrue := PreferMonthFirst(true)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
// allows the day to be preferred before the month, when completely ambiguous
preferMonthFirstFalse := PreferMonthFirst(false)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
}
func TestRetryAmbiguousDateWithSwap(t *testing.T) {
// default is false
_, err := ParseAny("13/02/2014 04:08:09 +0000 UTC")
assert.NotEqual(t, nil, err)
// will fail error if the month preference cannot work due to the value being larger than 12
retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false)
_, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse)
assert.NotEqual(t, nil, err)
// will retry with the other month preference if this error is detected
retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true)
ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
}