Support RabbitMQ log format (dd-mon-yyyy::hh:mm:ss)

Adapt https://github.com/araddon/dateparse/pull/122 by https://github.com/bizy01 to add support for RMQ log format. Refactor to avoid redundant code. Add format validations.

As a side note, will also support the format dd-mm-yyyy:hh:mm:ss.
This commit is contained in:
Klondike Dragon 2023-12-15 20:22:47 -07:00
parent 249dd7368c
commit 0c3943eacd
2 changed files with 73 additions and 33 deletions

View File

@ -598,44 +598,76 @@ iterRunes:
// 13-Feb-03 ambiguous
// 28-Feb-03 ambiguous
// 29-Jun-2016 dd-month(alpha)-yyyy
// 8-Mar-2018::
// dateDigitDashDigitDash:
// 29-06-2026
// 08-03-18:: ambiguous (dd-mm-yy or yy-mm-dd)
switch r {
case ' ':
// we need to find if this was 4 digits, aka year
// or 2 digits which makes it ambiguous year/day
length := i - (p.moi + p.molen + 1)
if length == 4 {
p.yearlen = 4
p.set(p.yeari, "2006")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
if !p.setDay() {
return p, unknownErr(datestr)
case ' ', ':':
doubleColonTimeConnector := false
if r == ':' {
p.link++
if p.link == 2 {
if i+1 < len(p.datestr) {
// only legitimate content to follow "::" is the start of the time
nextChar, _ := utf8.DecodeRuneInString(p.datestr[i+1:])
if unicode.IsDigit(nextChar) {
doubleColonTimeConnector = true
}
}
if !doubleColonTimeConnector {
return p, unknownErr(datestr)
}
}
} else if length == 2 {
// We have no idea if this is
// yy-mon-dd OR dd-mon-yy
// (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy)
//
// We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way.
p.ambiguousMD = true
p.yearlen = 2
p.set(p.yeari, "06")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
if !p.setDay() {
return p, unknownErr(datestr)
}
} else {
} else if p.link > 0 {
return p, unknownErr(datestr)
}
if r == ' ' || doubleColonTimeConnector {
// we need to find if this was 4 digits, aka year
// or 2 digits which makes it ambiguous year/day
var sepLen int
if doubleColonTimeConnector {
sepLen = 2
} else {
sepLen = 1
}
length := i - (p.moi + p.molen + sepLen)
if length == 4 {
p.yearlen = 4
p.set(p.yeari, "2006")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
if !p.setDay() {
return p, unknownErr(datestr)
}
} else if length == 2 {
// We have no idea if this is
// yy-mon-dd OR dd-mon-yy
// (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy)
//
// We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way.
p.ambiguousMD = true
p.yearlen = 2
p.set(p.yeari, "06")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
if !p.setDay() {
return p, unknownErr(datestr)
}
} else {
return p, unknownErr(datestr)
}
p.stateTime = timeStart
break iterRunes
}
default:
if !unicode.IsDigit(r) && !unicode.IsLetter(r) && p.link > 0 {
return p, unknownErr(datestr)
}
p.stateTime = timeStart
break iterRunes
}
case dateDigitYearSlash:
@ -2397,6 +2429,7 @@ type parser struct {
fullMonth string
parsedAMPM bool
skip int
link int
extra int
part1Len int
yeari int

View File

@ -446,6 +446,9 @@ var testInputs = []dateTest{
// Git log default date format - https://github.com/araddon/dateparse/pull/92
{in: "Thu Apr 7 15:13:13 2005 -0700", out: "2005-04-07 22:13:13 +0000 UTC"},
{in: "Tue Dec 12 23:07:11 2023 -0700", out: "2023-12-13 06:07:11 +0000 UTC"},
// RabbitMQ log format - https://github.com/araddon/dateparse/pull/122
{in: "8-Mar-2018::14:09:27", out: "2018-03-08 14:09:27 +0000 UTC"},
{in: "08-03-2018::02:09:29 PM", out: "2018-03-08 14:09:29 +0000 UTC"},
// yyyy-mm-dd hh:mm:ss,000
{in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"},
// yyyy-mm-dd hh:mm:ss +0000
@ -827,6 +830,10 @@ var testParseErrors = []dateTest{
{in: "2014-02-13 00:00:00 utc", err: true}, // lowercase timezones are not valid
{in: "2014-02-13t00:00:00.0z", err: true}, // lowercase 't' separator is not supported
{in: "2014-02-13T00:00:00.0z", err: true}, // lowercase 'z' zulu timezone indicator not a valid format
// Invalid variants of RabbitMQ log format
{in: "8-Mar-2018:14:09:27", err: true},
{in: "8-Mar-2018: 14:09:27", err: true},
{in: "8-Mar-2018:::14:09:27", err: true},
}
func TestParseErrors(t *testing.T) {
@ -1093,5 +1100,5 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) {
// Convenience function for debugging a particular broken test case
func TestDebug(t *testing.T) {
MustParse("Tue Dec 12 23:07:11 2023 -0700")
MustParse("8-Mar-2018::14:09:27")
}