diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index ea2b5de..ff43abf 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -11,9 +11,9 @@ jobs: name: lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache-Go - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: | ~/go/pkg/mod # Module download cache @@ -23,6 +23,6 @@ jobs: restore-keys: | ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - name: golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v7 with: version: latest diff --git a/.github/workflows/releaser.yml b/.github/workflows/releaser.yml index 8593dc3..fc1008a 100644 --- a/.github/workflows/releaser.yml +++ b/.github/workflows/releaser.yml @@ -10,15 +10,15 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v5 with: go-version: 1.20.x - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: fetch-depth: 0 - name: Cache-Go - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: | ~/go/pkg/mod # Module download cache @@ -30,7 +30,7 @@ jobs: - name: Test run: go test ./... - name: Run GoReleaser - uses: goreleaser/goreleaser-action@v5 + uses: goreleaser/goreleaser-action@v6 with: distribution: goreleaser version: latest diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index d266d8f..892cdf0 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -9,13 +9,13 @@ jobs: runs-on: ${{ matrix.os }} steps: - name: Install Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - name: Checkout code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Cache-Go - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: | ~/go/pkg/mod # Module download cache diff --git a/.gitignore b/.gitignore index 3503c41..386a0f3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pprof *.test dist -vendor \ No newline at end of file +vendor +dateparse/dateparse diff --git a/README.md b/README.md index 7656620..e7a71f6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Go Date Parser Parse date/time strings without knowing the format in advance. Supports 100+ formats. Validates comprehensively to avoid false positives. Very fast (~single-pass state-machine based approach). See [bench_test.go](https://github.com/itlightning/dateparse/blob/main/bench_test.go) for performance comparison. See the critical note below about timezones. -⚡ Maintained by [IT Lightning](https://itlightning.com/), a cloud-first logging platform that's uniquely powerful, super-easy (schemaless, point-and-shoot ingestion), and affordable. It automatically extracts and classifies structured data out of your unstructured log messages. Enjoy visual pattern-analysis and robust SQL-like search. It's unique architecture means you can log more and pay less. Check it out and give us feedback! ⚡ +⚡ Maintained by [SparkLogs](https://sparklogs.com/), a cloud-first logging platform that's uniquely powerful, super-easy (schemaless, point-and-shoot ingestion), and affordable. It automatically extracts and classifies structured data out of your unstructured log messages. Enjoy visual pattern-analysis and robust SQL-like search. It's unique architecture means you can log more and pay less. Check it out and give us feedback! SparkLogs is developed by [IT Lightning](https://itlightning.com/). ⚡ 🐛💡 Find a bug or have an idea with this package? [Issues](https://github.com/itlightning/dateparse/issues) and pull requests are welcome. @@ -75,6 +75,14 @@ cli tool for testing dateformats [Date Parse CLI](https://github.com/itlightning/dateparse/tree/main/dateparse) +Running the tests +---------------------------------- + +Make sure for your Linux distribution you've installed the relevant package that includes older timezone name links (e.g., `US/Pacific`). For example, on Ubuntu: + +```bash +sudo apt install tzdata-legacy +``` Extended example ------------------- diff --git a/dateparse/README.md b/dateparse/README.md index f6b99a6..128fa11 100644 --- a/dateparse/README.md +++ b/dateparse/README.md @@ -169,4 +169,44 @@ Your Using time.Local set to location=America/New_York EDT | ParseAny | time.Local = time.UTC | 2017-03-03 00:00:00 +0000 UTC | 2017-03-03 00:00:00 +0000 UTC day=5 | +-------------+---------------------------+----------------------------------------------------+----------------------------------------------------+ +# Automatically retry date formats that are ambiguous mm/dd vs dd/mm +$ ./dateparse --retry-ambiguous "28.09.2024" + +Your Current time.Local zone is MDT + +Layout String: dateparse.ParseFormat() => 02.01.2006 + ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ +| method | Zone Source | Parsed | Parsed: t.In(time.UTC) | ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ +| ParseIn | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseIn | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseLocal | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseLocal | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseStrict | time.Local = nil | this date has ambiguous mm/dd vs dd/mm type format | this date has ambiguous mm/dd vs dd/mm type format | +| ParseStrict | time.Local = time.UTC | this date has ambiguous mm/dd vs dd/mm type format | this date has ambiguous mm/dd vs dd/mm type format | +| ParseAny | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC day=6 | +| ParseAny | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC day=6 | ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ + +# Force dates to be interpreted as day-first instead of month-first +$ ./dateparse --prefer-day-first "28.09.2024" + +Your Current time.Local zone is MDT + +Layout String: dateparse.ParseFormat() => 02.01.2006 + ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ +| method | Zone Source | Parsed | Parsed: t.In(time.UTC) | ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ +| ParseAny | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC day=6 | +| ParseAny | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC day=6 | +| ParseIn | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseIn | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseLocal | time.Local = nil | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseLocal | time.Local = time.UTC | 2024-09-28 00:00:00 +0000 UTC | 2024-09-28 00:00:00 +0000 UTC | +| ParseStrict | time.Local = nil | this date has ambiguous mm/dd vs dd/mm type format | this date has ambiguous mm/dd vs dd/mm type format | +| ParseStrict | time.Local = time.UTC | this date has ambiguous mm/dd vs dd/mm type format | this date has ambiguous mm/dd vs dd/mm type format | ++-------------+-----------------------+----------------------------------------------------+----------------------------------------------------+ + ``` \ No newline at end of file diff --git a/dateparse/main.go b/dateparse/main.go index ff532e5..9ec8bcb 100644 --- a/dateparse/main.go +++ b/dateparse/main.go @@ -11,12 +11,27 @@ import ( ) var ( - timezone = "" - datestr = "" + timezone = "" + datestr = "" + retryAmbiguousDateWithSwap = false + preferDayFirst = false + parserOptions = []dateparse.ParserOption{} ) +func buildParserOptions() { + parserOptions = []dateparse.ParserOption{} + if retryAmbiguousDateWithSwap { + parserOptions = append(parserOptions, dateparse.RetryAmbiguousDateWithSwap(true)) + } + if preferDayFirst { + parserOptions = append(parserOptions, dateparse.PreferMonthFirst(false)) + } +} + func main() { flag.StringVar(&timezone, "timezone", "", "Timezone aka `America/Los_Angeles` formatted time-zone") + flag.BoolVar(&retryAmbiguousDateWithSwap, "retry-ambiguous", false, "Retry ambiguous date/time formats (day-first vs month-first)") + flag.BoolVar(&preferDayFirst, "prefer-day-first", false, "Prefer day-first date format") flag.Parse() if len(flag.Args()) == 0 { @@ -25,13 +40,17 @@ func main() { ./dateparse "2009-08-12T22:15:09.99Z" ./dateparse --timezone="America/Denver" "2017-07-19 03:21:51+00:00" + ./dateparse --prefer-day-first "28.09.2024" + ./dateparse --retry-ambiguous "28.09.2024" `) return } + buildParserOptions() + datestr = flag.Args()[0] - layout, err := dateparse.ParseFormat(datestr) + layout, err := dateparse.ParseFormat(datestr, parserOptions...) if err != nil { fatal(err) } @@ -82,7 +101,7 @@ type parser func(datestr string, loc *time.Location, utc bool) string func parseLocal(datestr string, loc *time.Location, utc bool) string { time.Local = loc - t, err := dateparse.ParseLocal(datestr) + t, err := dateparse.ParseLocal(datestr, parserOptions...) if err != nil { return err.Error() } @@ -93,7 +112,7 @@ func parseLocal(datestr string, loc *time.Location, utc bool) string { } func parseIn(datestr string, loc *time.Location, utc bool) string { - t, err := dateparse.ParseIn(datestr, loc) + t, err := dateparse.ParseIn(datestr, loc, parserOptions...) if err != nil { return err.Error() } @@ -104,7 +123,7 @@ func parseIn(datestr string, loc *time.Location, utc bool) string { } func parseAny(datestr string, loc *time.Location, utc bool) string { - t, err := dateparse.ParseAny(datestr) + t, err := dateparse.ParseAny(datestr, parserOptions...) if err != nil { return err.Error() } @@ -115,7 +134,7 @@ func parseAny(datestr string, loc *time.Location, utc bool) string { } func parseStrict(datestr string, loc *time.Location, utc bool) string { - t, err := dateparse.ParseStrict(datestr) + t, err := dateparse.ParseStrict(datestr, parserOptions...) if err != nil { return err.Error() } diff --git a/go.mod b/go.mod index a4597b2..6cbdb32 100644 --- a/go.mod +++ b/go.mod @@ -4,12 +4,12 @@ go 1.20 require ( github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4 - github.com/stretchr/testify v1.8.4 + github.com/stretchr/testify v1.10.0 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect - github.com/mattn/go-runewidth v0.0.15 // indirect + github.com/mattn/go-runewidth v0.0.16 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 2d54eed..808b30d 100644 --- a/go.sum +++ b/go.sum @@ -1,28 +1,17 @@ -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg= -github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= -github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= -github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= +github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= +github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= -github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4 h1:8qmTC5ByIXO3GP/IzBkxcZ/99VITvnIETDhdFz/om7A= github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/parseany.go b/parseany.go index 30188c8..fc2b2b5 100644 --- a/parseany.go +++ b/parseany.go @@ -171,7 +171,7 @@ func (p *parser) unexpectedTail(tailStart int) error { // go 1.20 allows us to convert a byte slice to a string without a memory allocation. // See https://github.com/golang/go/issues/53003#issuecomment-1140276077. func bytesToString(b []byte) string { - if b == nil || len(b) <= 0 { + if len(b) <= 0 { return "" } else { return unsafe.String(&b[0], len(b)) @@ -494,7 +494,8 @@ iterRunes: // 12 Feb 2006, 19:17 // 12 Feb 2006, 19:17:22 // 2013 Jan 06 15:04:05 - if adjustedI == 4 { + switch adjustedI { + case 4: p.yeari = p.skip p.yearlen = i - p.skip p.moi = i + 1 @@ -502,9 +503,9 @@ iterRunes: return p, p.unknownErr(datestr) } p.stateDate = dateYearWs - } else if adjustedI == 6 { + case 6: p.stateDate = dateDigitSt - } else { + default: p.stateDate = dateDigitWs p.dayi = p.skip p.daylen = i - p.skip @@ -743,7 +744,8 @@ iterRunes: sepLen = 1 } length := i - (p.moi + p.molen + sepLen) - if length == 4 { + switch length { + case 4: p.yearlen = 4 p.set(p.yeari, "2006") // We now also know that part1 was the day @@ -752,7 +754,7 @@ iterRunes: if !p.setDay() { return p, p.unknownErr(datestr) } - } else if length == 2 { + case 2: // We have no idea if this is // yy-mon-dd OR dd-mon-yy // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) @@ -769,7 +771,7 @@ iterRunes: if !p.setDay() { return p, p.unknownErr(datestr) } - } else { + default: return p, p.unknownErr(datestr) } p.stateTime = timeStart @@ -1257,8 +1259,8 @@ iterRunes: // Oct/07/1970 // February/ 7/1970 // February/07/1970 - switch { - case r == ' ': + switch r { + case ' ': // This could be a weekday or a month, detect and parse both cases. // skip & return to dateStart // Tue 05 May 2020, 05:05:05 @@ -1296,7 +1298,7 @@ iterRunes: return p, p.unknownErr(datestr) } - case r == ',': + case ',': // Mon, 02 Jan 2006 // Monday, 02 Jan 2006 if adjustedI >= 3 && p.nextIs(i, ' ') { @@ -1310,23 +1312,24 @@ iterRunes: return p, p.unknownErr(datestr) } } - case r == '.': + case '.': // sept. 28, 2017 // jan. 28, 2017 p.stateDate = dateAlphaPeriodWsDigit - if adjustedI == 3 { + switch adjustedI { + case 3: p.moi = p.skip p.molen = i - p.skip p.set(p.skip, "Jan") - } else if adjustedI == 4 { + case 4: // gross newDateStr := p.datestr[p.skip:i-1] + p.datestr[i:] putBackParser(p) return parseTime(newDateStr, loc, opts...) - } else { + default: return p, p.unknownErr(datestr) } - case r == '/': + case '/': // X // Oct/ 7/1970 // Oct/07/1970 @@ -2032,9 +2035,10 @@ iterRunes: // timeWsAlphaZoneOffsetWsExtra // 18:04:07 GMT+0100 (GMT Daylight Time) // 18:04:07 GMT+01:00 (GMT Daylight Time) - if r == '(' { + switch r { + case '(': return p, p.unknownErr(datestr) - } else if r == ')' { + case ')': // must be the end if i != len(p.datestr)-1 { return p, p.unknownErr(datestr) @@ -2076,11 +2080,12 @@ iterRunes: p.set(i-1, "PM") } p.parsedAMPM = true - if p.hourlen == 2 { + switch p.hourlen { + case 2: p.set(p.houri, "03") - } else if p.hourlen == 1 { + case 1: p.set(p.houri, "3") - } else { + default: return p, p.unknownErr(datestr) } } else { @@ -2519,7 +2524,8 @@ iterRunes: // dateDigitDashDigitDash: // 29-06-2026 length := len(p.datestr) - (p.moi + p.molen + 1) - if length == 4 { + switch length { + case 4: p.yearlen = 4 p.set(p.yeari, "2006") // We now also know that part1 was the day @@ -2528,7 +2534,7 @@ iterRunes: if !p.setDay() { return p, p.unknownErr(datestr) } - } else if length == 2 { + case 2: // We have no idea if this is // yy-mon-dd OR dd-mon-yy // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) @@ -2545,7 +2551,7 @@ iterRunes: if !p.setDay() { return p, p.unknownErr(datestr) } - } else { + default: return p, p.unknownErr(datestr) } } @@ -2865,37 +2871,40 @@ func (p *parser) set(start int, val string) { } func (p *parser) setMonth() bool { - if p.molen == 2 { + switch p.molen { + case 2: p.set(p.moi, "01") return true - } else if p.molen == 1 { + case 1: p.set(p.moi, "1") return true - } else { + default: return false } } func (p *parser) setDay() bool { - if p.daylen == 2 { + switch p.daylen { + case 2: p.set(p.dayi, "02") return true - } else if p.daylen == 1 { + case 1: p.set(p.dayi, "2") return true - } else { + default: return false } } func (p *parser) setYear() bool { - if p.yearlen == 2 { + switch p.yearlen { + case 2: p.set(p.yeari, "06") return true - } else if p.yearlen == 4 { + case 4: p.set(p.yeari, "2006") return true - } else { + default: return false } } @@ -3016,11 +3025,12 @@ func (p *parser) coalesceTime(end int) bool { // 3:4:5 // 15:04:05.00 if p.houri > 0 { - if p.hourlen == 2 { + switch p.hourlen { + case 2: p.set(p.houri, "15") - } else if p.hourlen == 1 { + case 1: p.set(p.houri, "3") - } else { + default: return false } } @@ -3028,11 +3038,12 @@ func (p *parser) coalesceTime(end int) bool { if p.minlen == 0 { p.minlen = end - p.mini } - if p.minlen == 2 { + switch p.minlen { + case 2: p.set(p.mini, "04") - } else if p.minlen == 1 { + case 1: p.set(p.mini, "4") - } else { + default: return false } } @@ -3040,11 +3051,12 @@ func (p *parser) coalesceTime(end int) bool { if p.seclen == 0 { p.seclen = end - p.seci } - if p.seclen == 2 { + switch p.seclen { + case 2: p.set(p.seci, "05") - } else if p.seclen == 1 { + case 1: p.set(p.seci, "5") - } else { + default: return false } }