mirror of
https://github.com/araddon/dateparse.git
synced 2025-01-19 11:16:12 +08:00
Heavily optimize memory allocations
Uses a memory pool for parser struct and format []byte Uses a new go 1.20 feature to avoid allocations for []byte to string conversions in allowable cases. go 1.20 also fixes a go bug for parsing fractional sec after a comma, so we can eliminate a workaround. The remaining allocations are mostly unavoidable (e.g., time.Parse constructing a FixedZone location or part to strings.ToLower). Results show an 89% reduction in allocated bytes for the big benchmark cases, and for some formats an allocation can be avoided entirely. There is also a resulting 26% speedup in ns/op. Details: BEFORE: cpu: 12th Gen Intel(R) Core(TM) i7-1255U BenchmarkShotgunParse-12 19448 B/op 474 allocs/op BenchmarkParseAny-12 4736 B/op 42 allocs/op BenchmarkBigShotgunParse-12 1075049 B/op 24106 allocs/op BenchmarkBigParseAny-12 241422 B/op 2916 allocs/op BenchmarkBigParseIn-12 244195 B/op 2984 allocs/op BenchmarkBigParseRetryAmbiguous-12 260751 B/op 3715 allocs/op BenchmarkShotgunParseErrors-12 67080 B/op 1679 allocs/op BenchmarkParseAnyErrors-12 15903 B/op 200 allocs/op AFTER: BenchmarkShotgunParse-12 19448 B/op 474 allocs/op BenchmarkParseAny-12 48 B/op 2 allocs/op BenchmarkBigShotgunParse-12 1075049 B/op 24106 allocs/op BenchmarkBigParseAny-12 25394 B/op 824 allocs/op BenchmarkBigParseIn-12 28165 B/op 892 allocs/op BenchmarkBigParseRetryAmbiguous-12 37880 B/op 1502 allocs/op BenchmarkShotgunParseErrors-12 67080 B/op 1679 allocs/op BenchmarkParseAnyErrors-12 3851 B/op 117 allocs/op
This commit is contained in:
parent
0d2fd5e275
commit
f4307ef59d
2
go.mod
2
go.mod
@ -1,6 +1,6 @@
|
|||||||
module github.com/araddon/dateparse
|
module github.com/araddon/dateparse
|
||||||
|
|
||||||
go 1.19
|
go 1.20
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4
|
github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4
|
||||||
|
172
parseany.go
172
parseany.go
@ -7,9 +7,11 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
"unicode"
|
"unicode"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
// func init() {
|
// func init() {
|
||||||
@ -147,11 +149,22 @@ func unexpectedTail(tail string) error {
|
|||||||
return fmt.Errorf("%w %q", ErrUnexpectedTail, tail)
|
return fmt.Errorf("%w %q", ErrUnexpectedTail, tail)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// go 1.20 allows us to convert a byte slice to a string without a memory allocation.
|
||||||
|
// See https://github.com/golang/go/issues/53003#issuecomment-1140276077.
|
||||||
|
func bytesToString(b []byte) string {
|
||||||
|
if b == nil || len(b) <= 0 {
|
||||||
|
return ""
|
||||||
|
} else {
|
||||||
|
return unsafe.String(&b[0], len(b))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ParseAny parse an unknown date format, detect the layout.
|
// ParseAny parse an unknown date format, detect the layout.
|
||||||
// Normal parse. Equivalent Timezone rules as time.Parse().
|
// Normal parse. Equivalent Timezone rules as time.Parse().
|
||||||
// NOTE: please see readme on mmdd vs ddmm ambiguous dates.
|
// NOTE: please see readme on mmdd vs ddmm ambiguous dates.
|
||||||
func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
|
func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
|
||||||
p, err := parseTime(datestr, nil, opts...)
|
p, err := parseTime(datestr, nil, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
@ -165,6 +178,7 @@ func ParseAny(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
// in other locations.
|
// in other locations.
|
||||||
func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) {
|
func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Time, error) {
|
||||||
p, err := parseTime(datestr, loc, opts...)
|
p, err := parseTime(datestr, loc, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
@ -187,6 +201,7 @@ func ParseIn(datestr string, loc *time.Location, opts ...ParserOption) (time.Tim
|
|||||||
// t, err := dateparse.ParseIn("3/1/2014", denverLoc)
|
// t, err := dateparse.ParseIn("3/1/2014", denverLoc)
|
||||||
func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
|
func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
|
||||||
p, err := parseTime(datestr, time.Local, opts...)
|
p, err := parseTime(datestr, time.Local, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
@ -197,6 +212,7 @@ func ParseLocal(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
// Not recommended for most use-cases.
|
// Not recommended for most use-cases.
|
||||||
func MustParse(datestr string, opts ...ParserOption) time.Time {
|
func MustParse(datestr string, opts ...ParserOption) time.Time {
|
||||||
p, err := parseTime(datestr, nil, opts...)
|
p, err := parseTime(datestr, nil, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err.Error())
|
panic(err.Error())
|
||||||
}
|
}
|
||||||
@ -214,6 +230,7 @@ func MustParse(datestr string, opts ...ParserOption) time.Time {
|
|||||||
// // layout = "2006-01-02 15:04:05"
|
// // layout = "2006-01-02 15:04:05"
|
||||||
func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
|
func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
|
||||||
p, err := parseTime(datestr, nil, opts...)
|
p, err := parseTime(datestr, nil, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@ -228,6 +245,7 @@ func ParseFormat(datestr string, opts ...ParserOption) (string, error) {
|
|||||||
// mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc
|
// mm/dd vs dd/mm then return an error. These return errors: 3.3.2014 , 8/8/71 etc
|
||||||
func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
|
func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
|
||||||
p, err := parseTime(datestr, nil, opts...)
|
p, err := parseTime(datestr, nil, opts...)
|
||||||
|
defer putBackParser(p)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return time.Time{}, err
|
return time.Time{}, err
|
||||||
}
|
}
|
||||||
@ -237,6 +255,8 @@ func ParseStrict(datestr string, opts ...ParserOption) (time.Time, error) {
|
|||||||
return p.parse()
|
return p.parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Creates a new parser and parses the given datestr in the given loc with the given options.
|
||||||
|
// The caller must call putBackParser on the returned parser when done with it.
|
||||||
func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) {
|
func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *parser, err error) {
|
||||||
|
|
||||||
p, err = newParser(datestr, loc, opts...)
|
p, err = newParser(datestr, loc, opts...)
|
||||||
@ -244,12 +264,6 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// if this string is impossibly long, don't even try. longest date might be something like:
|
|
||||||
// 'Wednesday, 8 February 2023 19:00:46.999999999 +11:00 (AEDT) m=+0.000000001'
|
|
||||||
if len(datestr) > 75 {
|
|
||||||
return p, unknownErr(datestr)
|
|
||||||
}
|
|
||||||
|
|
||||||
if p.retryAmbiguousDateWithSwap {
|
if p.retryAmbiguousDateWithSwap {
|
||||||
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
|
// month out of range signifies that a day/month swap is the correct solution to an ambiguous date
|
||||||
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
|
// this is because it means that a day is being interpreted as a month and overflowing the valid value for that
|
||||||
@ -265,6 +279,7 @@ func parseTime(datestr string, loc *time.Location, opts ...ParserOption) (p *par
|
|||||||
// turn off the retry to avoid endless recursion
|
// turn off the retry to avoid endless recursion
|
||||||
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
|
retryAmbiguousDateWithSwap := RetryAmbiguousDateWithSwap(false)
|
||||||
modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap)
|
modifiedOpts := append(opts, preferMonthFirst, retryAmbiguousDateWithSwap)
|
||||||
|
putBackParser(p)
|
||||||
p, err = parseTime(datestr, time.Local, modifiedOpts...)
|
p, err = parseTime(datestr, time.Local, modifiedOpts...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1105,7 +1120,9 @@ iterRunes:
|
|||||||
maybeDay := strings.ToLower(p.datestr[0:i])
|
maybeDay := strings.ToLower(p.datestr[0:i])
|
||||||
if isDay(maybeDay) {
|
if isDay(maybeDay) {
|
||||||
// using skip throws off indices used by other code; saner to restart
|
// using skip throws off indices used by other code; saner to restart
|
||||||
return parseTime(p.datestr[i+1:], loc)
|
newDateStr := p.datestr[i+1:]
|
||||||
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc)
|
||||||
}
|
}
|
||||||
p.stateDate = dateAlphaWs
|
p.stateDate = dateAlphaWs
|
||||||
}
|
}
|
||||||
@ -1133,8 +1150,9 @@ iterRunes:
|
|||||||
p.set(0, "Jan")
|
p.set(0, "Jan")
|
||||||
} else if i == 4 {
|
} else if i == 4 {
|
||||||
// gross
|
// gross
|
||||||
newDatestr := p.datestr[0:i-1] + p.datestr[i:]
|
newDateStr := p.datestr[0:i-1] + p.datestr[i:]
|
||||||
return parseTime(newDatestr, loc, opts...)
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc, opts...)
|
||||||
} else {
|
} else {
|
||||||
return p, unknownErr(datestr)
|
return p, unknownErr(datestr)
|
||||||
}
|
}
|
||||||
@ -1332,25 +1350,33 @@ iterRunes:
|
|||||||
case 't', 'T':
|
case 't', 'T':
|
||||||
if p.nextIs(i, 'h') || p.nextIs(i, 'H') {
|
if p.nextIs(i, 'h') || p.nextIs(i, 'H') {
|
||||||
if len(p.datestr) > i+2 {
|
if len(p.datestr) > i+2 {
|
||||||
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
|
newDateStr := p.datestr[0:i] + p.datestr[i+2:]
|
||||||
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc, opts...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 'n', 'N':
|
case 'n', 'N':
|
||||||
if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
|
if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
|
||||||
if len(p.datestr) > i+2 {
|
if len(p.datestr) > i+2 {
|
||||||
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
|
newDateStr := p.datestr[0:i] + p.datestr[i+2:]
|
||||||
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc, opts...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 's', 'S':
|
case 's', 'S':
|
||||||
if p.nextIs(i, 't') || p.nextIs(i, 'T') {
|
if p.nextIs(i, 't') || p.nextIs(i, 'T') {
|
||||||
if len(p.datestr) > i+2 {
|
if len(p.datestr) > i+2 {
|
||||||
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
|
newDateStr := p.datestr[0:i] + p.datestr[i+2:]
|
||||||
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc, opts...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case 'r', 'R':
|
case 'r', 'R':
|
||||||
if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
|
if p.nextIs(i, 'd') || p.nextIs(i, 'D') {
|
||||||
if len(p.datestr) > i+2 {
|
if len(p.datestr) > i+2 {
|
||||||
return parseTime(fmt.Sprintf("%s%s", p.datestr[0:i], p.datestr[i+2:]), loc, opts...)
|
newDateStr := p.datestr[0:i] + p.datestr[i+2:]
|
||||||
|
putBackParser(p)
|
||||||
|
return parseTime(newDateStr, loc, opts...)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1575,13 +1601,6 @@ iterRunes:
|
|||||||
p.houri = i
|
p.houri = i
|
||||||
}
|
}
|
||||||
switch r {
|
switch r {
|
||||||
case ',':
|
|
||||||
// hm, lets just swap out comma for period. for some reason go
|
|
||||||
// won't parse it.
|
|
||||||
// 2014-05-11 08:20:13,787
|
|
||||||
ds := []byte(p.datestr)
|
|
||||||
ds[i] = '.'
|
|
||||||
return parseTime(string(ds), loc, opts...)
|
|
||||||
case '-', '+':
|
case '-', '+':
|
||||||
// 03:21:51+00:00
|
// 03:21:51+00:00
|
||||||
p.stateTime = timeOffset
|
p.stateTime = timeOffset
|
||||||
@ -1597,7 +1616,8 @@ iterRunes:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
p.offseti = i
|
p.offseti = i
|
||||||
case '.':
|
case '.', ',':
|
||||||
|
// NOTE: go 1.20 can now parse a string that has a comma delimiter properly
|
||||||
p.stateTime = timePeriod
|
p.stateTime = timePeriod
|
||||||
p.seclen = i - p.seci
|
p.seclen = i - p.seci
|
||||||
p.msi = i + 1
|
p.msi = i + 1
|
||||||
@ -2042,12 +2062,12 @@ iterRunes:
|
|||||||
// may or may not have a space on the end
|
// may or may not have a space on the end
|
||||||
if offsetLen == 7 {
|
if offsetLen == 7 {
|
||||||
if p.datestr[p.offseti+6] != ' ' {
|
if p.datestr[p.offseti+6] != ' ' {
|
||||||
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen]))
|
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p.set(p.offseti, "-07:00")
|
p.set(p.offseti, "-07:00")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:p.offseti+offsetLen]))
|
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:p.offseti+offsetLen])
|
||||||
}
|
}
|
||||||
// process timezone
|
// process timezone
|
||||||
switch len(p.datestr) - p.tzi {
|
switch len(p.datestr) - p.tzi {
|
||||||
@ -2057,7 +2077,7 @@ iterRunes:
|
|||||||
case 4:
|
case 4:
|
||||||
p.set(p.tzi, "MST ")
|
p.set(p.tzi, "MST ")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:]))
|
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:])
|
||||||
}
|
}
|
||||||
case timeWsAlpha:
|
case timeWsAlpha:
|
||||||
switch len(p.datestr) - p.tzi {
|
switch len(p.datestr) - p.tzi {
|
||||||
@ -2067,7 +2087,7 @@ iterRunes:
|
|||||||
case 4:
|
case 4:
|
||||||
p.set(p.tzi, "MST ")
|
p.set(p.tzi, "MST ")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:]))
|
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:])
|
||||||
}
|
}
|
||||||
|
|
||||||
case timeWsAlphaRParen:
|
case timeWsAlphaRParen:
|
||||||
@ -2095,13 +2115,13 @@ iterRunes:
|
|||||||
case 6:
|
case 6:
|
||||||
p.set(p.offseti, "-07:00")
|
p.set(p.offseti, "-07:00")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:i]))
|
return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:i])
|
||||||
}
|
}
|
||||||
|
|
||||||
case timePeriod:
|
case timePeriod:
|
||||||
p.mslen = i - p.msi
|
p.mslen = i - p.msi
|
||||||
if p.mslen >= 10 {
|
if p.mslen >= 10 {
|
||||||
return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, string(p.datestr[p.msi:p.mslen]))
|
return p, fmt.Errorf("fractional seconds in %q too long near %q", datestr, p.datestr[p.msi:p.mslen])
|
||||||
}
|
}
|
||||||
case timeOffset, timeWsOffset, timeWsYearOffset:
|
case timeOffset, timeWsOffset, timeWsYearOffset:
|
||||||
switch len(p.datestr) - p.offseti {
|
switch len(p.datestr) - p.offseti {
|
||||||
@ -2112,7 +2132,7 @@ iterRunes:
|
|||||||
// 19:55:00+0100 (or 19:55:00 +0100)
|
// 19:55:00+0100 (or 19:55:00 +0100)
|
||||||
p.set(p.offseti, "-0700")
|
p.set(p.offseti, "-0700")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, string(p.datestr[p.offseti:]))
|
return p, fmt.Errorf("TZ offset not recognized %q near %q (must be 2 or 4 digits optional colon)", datestr, p.datestr[p.offseti:])
|
||||||
}
|
}
|
||||||
|
|
||||||
case timeWsOffsetWs:
|
case timeWsOffsetWs:
|
||||||
@ -2127,7 +2147,7 @@ iterRunes:
|
|||||||
// 13:31:51.999 +01:00 CEST
|
// 13:31:51.999 +01:00 CEST
|
||||||
p.set(p.tzi, "MST ")
|
p.set(p.tzi, "MST ")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, string(p.datestr[p.tzi:]))
|
return p, fmt.Errorf("timezone not recognized %q near %q (must be 3 or 4 characters)", datestr, p.datestr[p.tzi:])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case timeOffsetColon, timeWsOffsetColon:
|
case timeOffsetColon, timeWsOffsetColon:
|
||||||
@ -2137,7 +2157,7 @@ iterRunes:
|
|||||||
case 6:
|
case 6:
|
||||||
p.set(p.offseti, "-07:00")
|
p.set(p.offseti, "-07:00")
|
||||||
default:
|
default:
|
||||||
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, string(p.datestr[p.offseti:]))
|
return p, fmt.Errorf("TZ offset not recognized %q near %q (expected offset like -07:00)", datestr, p.datestr[p.offseti:])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p.coalesceTime(i)
|
p.coalesceTime(i)
|
||||||
@ -2452,6 +2472,46 @@ type parser struct {
|
|||||||
t *time.Time
|
t *time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// something like: "Wednesday, 8 February 2023 19:00:46.999999999 +11:00 (AEDT) m=+0.000000001"
|
||||||
|
const longestPossibleDateStr = 78
|
||||||
|
|
||||||
|
// the format byte slice is always a little larger, in case we need to expand it to contain a full month
|
||||||
|
const formatExtraBufferBytes = 16
|
||||||
|
const formatBufferCapacity = longestPossibleDateStr + formatExtraBufferBytes
|
||||||
|
|
||||||
|
var parserPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
// allocate a max-sized fixed-capacity format byte slice
|
||||||
|
// that will be re-used with this parser struct
|
||||||
|
return &parser{
|
||||||
|
format: make([]byte, 0, formatBufferCapacity),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var emptyString = ""
|
||||||
|
|
||||||
|
// Use to put a parser back into the pool in the right way
|
||||||
|
func putBackParser(p *parser) {
|
||||||
|
if p == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// we'll be reusing the backing memory for the format byte slice, put it back
|
||||||
|
// to maximum capacity
|
||||||
|
if cap(p.format) == longestPossibleDateStr {
|
||||||
|
p.format = p.format[:longestPossibleDateStr]
|
||||||
|
} else {
|
||||||
|
// the parsing process replaced this, get back a new one with the right cap
|
||||||
|
p.format = make([]byte, 0, longestPossibleDateStr)
|
||||||
|
}
|
||||||
|
// clear out pointers so we don't leak memory we don't need any longer
|
||||||
|
p.loc = nil
|
||||||
|
p.datestr = emptyString
|
||||||
|
p.fullMonth = emptyString
|
||||||
|
p.t = nil
|
||||||
|
parserPool.Put(p)
|
||||||
|
}
|
||||||
|
|
||||||
// ParserOption defines a function signature implemented by options
|
// ParserOption defines a function signature implemented by options
|
||||||
// Options defined like this accept the parser and operate on the data within
|
// Options defined like this accept the parser and operate on the data within
|
||||||
type ParserOption func(*parser) error
|
type ParserOption func(*parser) error
|
||||||
@ -2482,18 +2542,29 @@ func AllowPartialStringMatch(allowPartialStringMatch bool) ParserOption {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Creates a new parser. The caller must call putBackParser on the returned parser when done with it.
|
||||||
func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) {
|
func newParser(dateStr string, loc *time.Location, opts ...ParserOption) (*parser, error) {
|
||||||
p := &parser{
|
dateStrLen := len(dateStr)
|
||||||
|
if dateStrLen > longestPossibleDateStr {
|
||||||
|
return nil, unknownErr(dateStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure to re-use the format byte slice from the pooled parser struct
|
||||||
|
p := parserPool.Get().(*parser)
|
||||||
|
// This re-slicing is guaranteed to work because of the length check above
|
||||||
|
startingFormat := p.format[:dateStrLen]
|
||||||
|
copy(startingFormat, dateStr)
|
||||||
|
*p = parser{
|
||||||
stateDate: dateStart,
|
stateDate: dateStart,
|
||||||
stateTime: timeIgnore,
|
stateTime: timeIgnore,
|
||||||
datestr: dateStr,
|
datestr: dateStr,
|
||||||
loc: loc,
|
loc: loc,
|
||||||
preferMonthFirst: true,
|
preferMonthFirst: true,
|
||||||
retryAmbiguousDateWithSwap: false,
|
retryAmbiguousDateWithSwap: false,
|
||||||
|
format: startingFormat,
|
||||||
|
// this tracks how much of the format string has been set, to make sure all of it is set
|
||||||
|
formatSetLen: 0,
|
||||||
}
|
}
|
||||||
p.format = []byte(dateStr)
|
|
||||||
// this tracks how much of the format string has been set, to make sure all of it is set
|
|
||||||
p.formatSetLen = 0
|
|
||||||
|
|
||||||
// allow the options to mutate the parser fields from their defaults
|
// allow the options to mutate the parser fields from their defaults
|
||||||
for _, option := range opts {
|
for _, option := range opts {
|
||||||
@ -2512,7 +2583,8 @@ func (p *parser) nextIs(i int, b byte) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (p *parser) setEntireFormat(format []byte) {
|
func (p *parser) setEntireFormat(format []byte) {
|
||||||
p.format = format
|
// Copy so that we don't lose this pooled format byte slice
|
||||||
|
copy(p.format, format)
|
||||||
p.formatSetLen = len(format)
|
p.formatSetLen = len(format)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2665,8 +2737,22 @@ func (p *parser) coalesceTime(end int) {
|
|||||||
func (p *parser) setFullMonth(month string) {
|
func (p *parser) setFullMonth(month string) {
|
||||||
oldLen := len(p.format)
|
oldLen := len(p.format)
|
||||||
const fullMonth = "January"
|
const fullMonth = "January"
|
||||||
p.format = []byte(fmt.Sprintf("%s%s%s", p.format[0:p.moi], fullMonth, p.format[p.moi+len(month):]))
|
// Do an overlapping copy so we don't lose the pooled format buffer
|
||||||
newLen := len(p.format)
|
part1Len := p.moi
|
||||||
|
part3 := p.format[p.moi+len(month):]
|
||||||
|
newLen := part1Len + len(fullMonth) + len(part3)
|
||||||
|
if newLen > oldLen {
|
||||||
|
// We can re-slice this, because the capacity is guaranteed to be a little longer than any possible datestr
|
||||||
|
p.format = p.format[:newLen]
|
||||||
|
}
|
||||||
|
// first part will not change, we need to shift the third part
|
||||||
|
copy(p.format[part1Len+len(fullMonth):], part3)
|
||||||
|
copy(p.format[part1Len:], fullMonth)
|
||||||
|
// shorten the format slice now if needed
|
||||||
|
if newLen < oldLen {
|
||||||
|
p.format = p.format[:newLen]
|
||||||
|
}
|
||||||
|
|
||||||
if newLen > oldLen && p.formatSetLen >= p.moi {
|
if newLen > oldLen && p.formatSetLen >= p.moi {
|
||||||
p.formatSetLen += newLen - oldLen
|
p.formatSetLen += newLen - oldLen
|
||||||
} else if newLen < oldLen && p.formatSetLen >= p.moi {
|
} else if newLen < oldLen && p.formatSetLen >= p.moi {
|
||||||
@ -2706,14 +2792,16 @@ func (p *parser) parse() (time.Time, error) {
|
|||||||
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
||||||
// We can always ignore punctuation at the end of a date/time, but do not allow
|
// We can always ignore punctuation at the end of a date/time, but do not allow
|
||||||
// any numbers or letters in the format string.
|
// any numbers or letters in the format string.
|
||||||
validFormatTo := findProperEnd(string(p.format), p.formatSetLen, len(p.format), false, false, true)
|
validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true)
|
||||||
if validFormatTo < len(p.format) {
|
if validFormatTo < len(p.format) {
|
||||||
return time.Time{}, unexpectedTail(string(p.format[p.formatSetLen:]))
|
return time.Time{}, unexpectedTail(p.datestr[p.formatSetLen:])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if p.skip > 0 && len(p.format) > p.skip {
|
if p.skip > 0 && len(p.format) > p.skip {
|
||||||
p.format = p.format[p.skip:]
|
// copy and then re-slice to shorten to avoid losing the header of the pooled format string
|
||||||
|
copy(p.format, p.format[p.skip:])
|
||||||
|
p.format = p.format[:len(p.format)-p.skip]
|
||||||
p.formatSetLen -= p.skip
|
p.formatSetLen -= p.skip
|
||||||
if p.formatSetLen < 0 {
|
if p.formatSetLen < 0 {
|
||||||
p.formatSetLen = 0
|
p.formatSetLen = 0
|
||||||
@ -2723,10 +2811,10 @@ func (p *parser) parse() (time.Time, error) {
|
|||||||
|
|
||||||
if p.loc == nil {
|
if p.loc == nil {
|
||||||
// gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr)
|
// gou.Debugf("parse layout=%q input=%q \ntx, err := time.Parse(%q, %q)", string(p.format), p.datestr, string(p.format), p.datestr)
|
||||||
return time.Parse(string(p.format), p.datestr)
|
return time.Parse(bytesToString(p.format), p.datestr)
|
||||||
}
|
}
|
||||||
//gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc)
|
//gou.Debugf("parse layout=%q input=%q \ntx, err := time.ParseInLocation(%q, %q, %v)", string(p.format), p.datestr, string(p.format), p.datestr, p.loc)
|
||||||
return time.ParseInLocation(string(p.format), p.datestr, p.loc)
|
return time.ParseInLocation(bytesToString(p.format), p.datestr, p.loc)
|
||||||
}
|
}
|
||||||
func isDay(alpha string) bool {
|
func isDay(alpha string) bool {
|
||||||
for _, day := range days {
|
for _, day := range days {
|
||||||
|
Loading…
Reference in New Issue
Block a user