mirror of
https://github.com/araddon/dateparse.git
synced 2025-01-19 19:26:09 +08:00
Optimize ambiguous date parsing
Previously, for ambiguous date strings, it was always calling parse twice even when the first parse would have been successful. Refactor so that parsing isn't re-attempted unless the first parse fails ambiguously. Benchmark results show that with RetryAmbiguousDateWithSwap(true), it's now about 6.5% faster (ns/op) and reduces allocated bytes by 3.4%.
This commit is contained in:
parent
f4307ef59d
commit
ed5310d0c1
72
parseany.go
72
parseany.go
@ -168,7 +168,7 @@ func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
return p.parse()
|
return p.parse(nil, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseIn with Location, equivalent to time.ParseInLocation() timezone/offset
|
// ParseIn with Location, equivalent to time.ParseInLocation() timezone/offset
|
||||||
@ -182,7 +182,7 @@ func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Tim
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
return p.parse()
|
return p.parse(loc, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParseLocal Given an unknown date format, detect the layout,
|
// ParseLocal Given an unknown date format, detect the layout,
|
||||||
@ -205,7 +205,7 @@ func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
return p.parse()
|
return p.parse(time.Local, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// MustParse parse a date, and panic if it can't be parsed. Used for testing.
|
// MustParse parse a date, and panic if it can't be parsed. Used for testing.
|
||||||
@ -216,7 +216,7 @@ func MustParse(datestr string, opts ...ParserOption) time.Time {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err.Error())
|
panic(err.Error())
|
||||||
}
|
}
|
||||||
t, err := p.parse()
|
t, err := p.parse(nil, opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err.Error())
|
panic(err.Error())
|
||||||
}
|
}
|
||||||
@ -234,7 +234,7 @@ func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
_, err = p.parse()
|
_, err = p.parse(nil, opts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@ -252,7 +252,7 @@ func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
if p.ambiguousMD {
|
if p.ambiguousMD {
|
||||||
return time.Time{}, ErrAmbiguousMMDD
|
return time.Time{}, ErrAmbiguousMMDD
|
||||||
}
|
}
|
||||||
return p.parse()
|
return p.parse(nil, opts...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates a new parser and parses the given datestr in the given loc with the given options.
|
// Creates a new parser and parses the given datestr in the given loc with the given options.
|
||||||
@ -264,29 +264,6 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.retryAmbiguousDateWithSwap {
|
|
||||||
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
|
|
||||||
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
|
|
||||||
// by retrying in this case, we can fix a common situation with no assumptions
|
|
||||||
defer func() {
|
|
||||||
if p != nil && p.ambiguousMD {
|
|
||||||
// if it errors out with the following error, swap before we
|
|
||||||
// get out of this function to reduce scope it needs to be applied on
|
|
||||||
_, err = p.parse()
|
|
||||||
if err != nil && strings.Contains(err.Error(), "month out of range") {
|
|
||||||
// create the option to reverse the preference
|
|
||||||
preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst)
|
|
||||||
// turn off the retry to avoid endless recursion
|
|
||||||
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
|
|
||||||
modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap)
|
|
||||||
putBackParser(p)
|
|
||||||
p, err = parseTime(datestr, time.Local, modifiedOpts...)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
// IMPORTANT: we may need to modify the datestr while we are parsing (e.g., to
|
// IMPORTANT: we may need to modify the datestr while we are parsing (e.g., to
|
||||||
// remove pieces of the string that should be ignored during golang parsing).
|
// remove pieces of the string that should be ignored during golang parsing).
|
||||||
// We will iterate over the modified datestr, and whenever we update datestr,
|
// We will iterate over the modified datestr, and whenever we update datestr,
|
||||||
@ -2584,6 +2561,12 @@ func (p *parser) nextIs(i int, b byte) bool {
|
|||||||
|
|
||||||
func (p *parser) setEntireFormat(format []byte) {
|
func (p *parser) setEntireFormat(format []byte) {
|
||||||
// Copy so that we don't lose this pooled format byte slice
|
// Copy so that we don't lose this pooled format byte slice
|
||||||
|
oldLen := len(p.format)
|
||||||
|
newLen := len(format)
|
||||||
|
if oldLen != newLen {
|
||||||
|
// guaranteed to work because of the allocated capacity for format buffers
|
||||||
|
p.format = p.format[:newLen]
|
||||||
|
}
|
||||||
copy(p.format, format)
|
copy(p.format, format)
|
||||||
p.formatSetLen = len(format)
|
p.formatSetLen = len(format)
|
||||||
}
|
}
|
||||||
@ -2780,7 +2763,10 @@ func (p *parser) trimExtra(onlyTrimFormat bool) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) parse() (time.Time, error) {
|
func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption) (t time.Time, err error) {
|
||||||
|
if p == nil {
|
||||||
|
return time.Time{}, unknownErr("")
|
||||||
|
}
|
||||||
if p.t != nil {
|
if p.t != nil {
|
||||||
return *p.t, nil
|
return *p.t, nil
|
||||||
}
|
}
|
||||||
@ -2788,6 +2774,32 @@ func (p *parser) parse() (time.Time, error) {
|
|||||||
p.setFullMonth(p.fullMonth)
|
p.setFullMonth(p.fullMonth)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if p.retryAmbiguousDateWithSwap && p.ambiguousMD {
|
||||||
|
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
|
||||||
|
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
|
||||||
|
// by retrying in this case, we can fix a common situation with no assumptions
|
||||||
|
defer func() {
|
||||||
|
// if actual time parsing errors out with the following error, swap before we
|
||||||
|
// get out of this function to reduce scope it needs to be applied on
|
||||||
|
if err != nil && strings.Contains(err.Error(), "month out of range") {
|
||||||
|
// create the option to reverse the preference
|
||||||
|
preferMonthFirst := PreferMonthFirst(!p.preferMonthFirst)
|
||||||
|
// turn off the retry to avoid endless recursion
|
||||||
|
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
|
||||||
|
modifiedOpts := append(originalOpts, preferMonthFirst, retryAmbiguousDateWithSwap)
|
||||||
|
var newParser *parser
|
||||||
|
newParser, err = parseTime(p.datestr, originalLoc, modifiedOpts...)
|
||||||
|
defer putBackParser(newParser)
|
||||||
|
if err == nil {
|
||||||
|
t, err = newParser.parse(originalLoc, modifiedOpts...)
|
||||||
|
// The caller might use the format and datestr, so copy that back to the original parser
|
||||||
|
p.setEntireFormat(newParser.format)
|
||||||
|
p.datestr = newParser.datestr
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
// Make sure that the entire string matched to a known format that was detected
|
// Make sure that the entire string matched to a known format that was detected
|
||||||
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
||||||
// We can always ignore punctuation at the end of a date/time, but do not allow
|
// We can always ignore punctuation at the end of a date/time, but do not allow
|
||||||
|
Loading…
Reference in New Issue
Block a user