Merge pull request #89 from troyspencer/master

Options framework to allow more control over parsing
This commit is contained in:
Aaron Raddon 2020-09-30 12:45:49 -07:00 committed by GitHub
commit 9cb150cd12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 154 additions and 58 deletions

View File

@ -144,8 +144,8 @@ func unknownErr(datestr string) error {
// ParseAny parse an unknown date format, detect the layout. // ParseAny parse an unknown date format, detect the layout.
// Normal parse. Equivalent Timezone rules as time.Parse(). // Normal parse. Equivalent Timezone rules as time.Parse().
// NOTE: please see readme on mmdd vs ddmm ambiguous dates. // NOTE: please see readme on mmdd vs ddmm ambiguous dates.
func ParseAny(datestr string) (time.Time, error) { func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -157,8 +157,8 @@ func ParseAny(datestr string) (time.Time, error) {
// datestring, it uses the given location rules for any zone interpretation. // datestring, it uses the given location rules for any zone interpretation.
// That is, MST means one thing when using America/Denver and something else // That is, MST means one thing when using America/Denver and something else
// in other locations. // in other locations.
func ParseIn(datestr string, loc *time.Location) (time.Time, error) { func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, loc) p, err := parseTime(datestr, loc, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -180,8 +180,8 @@ func ParseIn(datestr string, loc *time.Location) (time.Time, error) {
// //
// t, err := dateparse.ParseIn("3/1/2014", denverLoc) // t, err := dateparse.ParseIn("3/1/2014", denverLoc)
// //
func ParseLocal(datestr string) (time.Time, error) { func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, time.Local) p, err := parseTime(datestr, time.Local, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -190,8 +190,8 @@ func ParseLocal(datestr string) (time.Time, error) {
// MustParse parse a date, and panic if it can't be parsed. Used for testing. // MustParse parse a date, and panic if it can't be parsed. Used for testing.
// Not recommended for most use-cases. // Not recommended for most use-cases.
func MustParse(datestr string) time.Time { func MustParse(datestr string, opts ...ParserOption) time.Time {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
panic(err.Error()) panic(err.Error())
} }
@ -208,8 +208,8 @@ func MustParse(datestr string) time.Time {
// layout, err := dateparse.ParseFormat("2013-02-01 00:00:00") // layout, err := dateparse.ParseFormat("2013-02-01 00:00:00")
// // layout = "2006-01-02 15:04:05" // // layout = "2006-01-02 15:04:05"
// //
func ParseFormat(datestr string) (string, error) { func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return "", err return "", err
} }
@ -222,8 +222,8 @@ func ParseFormat(datestr string) (string, error) {
// ParseStrict parse an unknown date format. IF the date is ambigous // ParseStrict parse an unknown date format. IF the date is ambigous
// mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc // mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc
func ParseStrict(datestr string) (time.Time, error) { func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
p, err := parseTime(datestr, nil) p, err := parseTime(datestr, nil, opts...)
if err != nil { if err != nil {
return time.Time{}, err return time.Time{}, err
} }
@ -233,9 +233,31 @@ func ParseStrict(datestr string) (time.Time, error) {
return p.parse() return p.parse()
} }
func parseTime(datestr string, loc *time.Location) (*parser, error) { func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) {
p = newParser(datestr, loc, opts...)
if p.retryAmbiguousDateWithSwap {
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
// by retrying in this case, we can fix a common situation with no assumptions
defer func() {
if p.ambiguousMD {
// if it errors out with the following error, swap before we
// get out of this function to reduce scope it needs to be applied on
_, err := p.parse()
if err != nil && strings.Contains(err.Error(), "month out of range") {
// create the option to reverse the preference
preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst)
// turn off the retry to avoid endless recursion
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap)
p, err = parseTime(datestr, time.Local, modifiedOpts...)
}
}
}()
}
p := newParser(datestr, loc)
i := 0 i := 0
// General strategy is to read rune by rune through the date looking for // General strategy is to read rune by rune through the date looking for
@ -293,6 +315,12 @@ iterRunes:
p.setMonth() p.setMonth()
p.dayi = i + 1 p.dayi = i + 1
} }
} else {
if p.daylen == 0 {
p.daylen = i
p.setDay()
p.moi = i + 1
}
} }
} }
@ -489,6 +517,12 @@ iterRunes:
p.setDay() p.setDay()
p.yeari = i + 1 p.yeari = i + 1
} }
} else {
if p.molen == 0 {
p.molen = i - p.moi
p.setMonth()
p.yeari = i + 1
}
} }
} }
@ -712,7 +746,7 @@ iterRunes:
} else if i == 4 { } else if i == 4 {
// gross // gross
datestr = datestr[0:i-1] + datestr[i:] datestr = datestr[0:i-1] + datestr[i:]
return parseTime(datestr, loc) return parseTime(datestr, loc, opts...)
} else { } else {
return nil, unknownErr(datestr) return nil, unknownErr(datestr)
} }
@ -867,25 +901,25 @@ iterRunes:
case 't', 'T': case 't', 'T':
if p.nextIs(i, 'h') || p.nextIs(i, 'H') { if p.nextIs(i, 'h') || p.nextIs(i, 'H') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 'n', 'N': case 'n', 'N':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 's', 'S': case 's', 'S':
if p.nextIs(i, 't') || p.nextIs(i, 'T') { if p.nextIs(i, 't') || p.nextIs(i, 'T') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
case 'r', 'R': case 'r', 'R':
if p.nextIs(i, 'd') || p.nextIs(i, 'D') { if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
if len(datestr) > i+2 { if len(datestr) > i+2 {
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc) return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
} }
} }
} }
@ -1059,7 +1093,7 @@ iterRunes:
// 2014-05-11 08:20:13,787 // 2014-05-11 08:20:13,787
ds := []byte(p.datestr) ds := []byte(p.datestr)
ds[i] = '.' ds[i] = '.'
return parseTime(string(ds), loc) return parseTime(string(ds), loc, opts...)
case '-', '+': case '-', '+':
// 03:21:51+00:00 // 03:21:51+00:00
p.stateTime = timeOffset p.stateTime = timeOffset
@ -1763,48 +1797,75 @@ iterRunes:
} }
type parser struct { type parser struct {
loc *time.Location loc *time.Location
preferMonthFirst bool preferMonthFirst bool
ambiguousMD bool retryAmbiguousDateWithSwap bool
stateDate dateState ambiguousMD bool
stateTime timeState stateDate dateState
format []byte stateTime timeState
datestr string format []byte
fullMonth string datestr string
skip int fullMonth string
extra int skip int
part1Len int extra int
yeari int part1Len int
yearlen int yeari int
moi int yearlen int
molen int moi int
dayi int molen int
daylen int dayi int
houri int daylen int
hourlen int houri int
mini int hourlen int
minlen int mini int
seci int minlen int
seclen int seci int
msi int seclen int
mslen int msi int
offseti int mslen int
offsetlen int offseti int
tzi int offsetlen int
tzlen int tzi int
t *time.Time tzlen int
t *time.Time
} }
func newParser(dateStr string, loc *time.Location) *parser { // ParserOption defines a function signature implemented by options
p := parser{ // Options defined like this accept the parser and operate on the data within
stateDate: dateStart, type ParserOption func(*parser) error
stateTime: timeIgnore,
datestr: dateStr, // PreferMonthFirst is an option that allows preferMonthFirst to be changed from its default
loc: loc, func PreferMonthFirst(preferMonthFirst bool) ParserOption {
preferMonthFirst: true, return func(p *parser) error {
p.preferMonthFirst = preferMonthFirst
return nil
}
}
// RetryAmbiguousDateWithSwap is an option that allows retryAmbiguousDateWithSwap to be changed from its default
func RetryAmbiguousDateWithSwap(retryAmbiguousDateWithSwap bool) ParserOption {
return func(p *parser) error {
p.retryAmbiguousDateWithSwap = retryAmbiguousDateWithSwap
return nil
}
}
func newParser(dateStr string, loc *time.Location, opts ...ParserOption) *parser {
p := &parser{
stateDate: dateStart,
stateTime: timeIgnore,
datestr: dateStr,
loc: loc,
preferMonthFirst: true,
retryAmbiguousDateWithSwap: false,
} }
p.format = []byte(dateStr) p.format = []byte(dateStr)
return &p
// allow the options to mutate the parser fields from their defaults
for _, option := range opts {
option(p)
}
return p
} }
func (p *parser) nextIs(i int, b byte) bool { func (p *parser) nextIs(i int, b byte) bool {

View File

@ -686,3 +686,38 @@ func TestInLocation(t *testing.T) {
assert.Equal(t, zeroTime, ts.Unix()) assert.Equal(t, zeroTime, ts.Unix())
assert.NotEqual(t, nil, err) assert.NotEqual(t, nil, err)
} }
func TestPreferMonthFirst(t *testing.T) {
// default case is true
ts, err := ParseAny("04/02/2014 04:08:09 +0000 UTC")
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
preferMonthFirstTrue := PreferMonthFirst(true)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-04-02 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
// allows the day to be preferred before the month, when completely ambiguous
preferMonthFirstFalse := PreferMonthFirst(false)
ts, err = ParseAny("04/02/2014 04:08:09 +0000 UTC", preferMonthFirstFalse)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-04 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
}
func TestRetryAmbiguousDateWithSwap(t *testing.T) {
// default is false
_, err := ParseAny("13/02/2014 04:08:09 +0000 UTC")
assert.NotEqual(t, nil, err)
// will fail error if the month preference cannot work due to the value being larger than 12
retryAmbiguousDateWithSwapFalse := RetryAmbiguousDateWithSwap(false)
_, err = ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapFalse)
assert.NotEqual(t, nil, err)
// will retry with the other month preference if this error is detected
retryAmbiguousDateWithSwapTrue := RetryAmbiguousDateWithSwap(true)
ts, err := ParseAny("13/02/2014 04:08:09 +0000 UTC", retryAmbiguousDateWithSwapTrue)
assert.Equal(t, nil, err)
assert.Equal(t, "2014-02-13 04:08:09 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
}