diff --git a/parseany.go b/parseany.go index b2f11aa..03cdad7 100644 --- a/parseany.go +++ b/parseany.go @@ -598,44 +598,76 @@ iterRunes: // 13-Feb-03 ambiguous // 28-Feb-03 ambiguous // 29-Jun-2016 dd-month(alpha)-yyyy + // 8-Mar-2018:: // dateDigitDashDigitDash: // 29-06-2026 + // 08-03-18:: ambiguous (dd-mm-yy or yy-mm-dd) switch r { - case ' ': - // we need to find if this was 4 digits, aka year - // or 2 digits which makes it ambiguous year/day - length := i - (p.moi + p.molen + 1) - if length == 4 { - p.yearlen = 4 - p.set(p.yeari, "2006") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { - return p, unknownErr(datestr) + case ' ', ':': + doubleColonTimeConnector := false + if r == ':' { + p.link++ + if p.link == 2 { + if i+1 < len(p.datestr) { + // only legitimate content to follow "::" is the start of the time + nextChar, _ := utf8.DecodeRuneInString(p.datestr[i+1:]) + if unicode.IsDigit(nextChar) { + doubleColonTimeConnector = true + } + } + if !doubleColonTimeConnector { + return p, unknownErr(datestr) + } } - } else if length == 2 { - // We have no idea if this is - // yy-mon-dd OR dd-mon-yy - // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) - // - // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), - // which is a horrible assumption, but seems to be the convention for - // dates that are formatted in this way. - p.ambiguousMD = true - p.yearlen = 2 - p.set(p.yeari, "06") - // We now also know that part1 was the day - p.dayi = 0 - p.daylen = p.part1Len - if !p.setDay() { - return p, unknownErr(datestr) - } - } else { + } else if p.link > 0 { + return p, unknownErr(datestr) + } + if r == ' ' || doubleColonTimeConnector { + // we need to find if this was 4 digits, aka year + // or 2 digits which makes it ambiguous year/day + var sepLen int + if doubleColonTimeConnector { + sepLen = 2 + } else { + sepLen = 1 + } + length := i - (p.moi + p.molen + sepLen) + if length == 4 { + p.yearlen = 4 + p.set(p.yeari, "2006") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else if length == 2 { + // We have no idea if this is + // yy-mon-dd OR dd-mon-yy + // (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy) + // + // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy), + // which is a horrible assumption, but seems to be the convention for + // dates that are formatted in this way. + p.ambiguousMD = true + p.yearlen = 2 + p.set(p.yeari, "06") + // We now also know that part1 was the day + p.dayi = 0 + p.daylen = p.part1Len + if !p.setDay() { + return p, unknownErr(datestr) + } + } else { + return p, unknownErr(datestr) + } + p.stateTime = timeStart + break iterRunes + } + default: + if !unicode.IsDigit(r) && !unicode.IsLetter(r) && p.link > 0 { return p, unknownErr(datestr) } - p.stateTime = timeStart - break iterRunes } case dateDigitYearSlash: @@ -2397,6 +2429,7 @@ type parser struct { fullMonth string parsedAMPM bool skip int + link int extra int part1Len int yeari int diff --git a/parseany_test.go b/parseany_test.go index 57a3998..9c9b6fa 100644 --- a/parseany_test.go +++ b/parseany_test.go @@ -446,6 +446,9 @@ var testInputs = []dateTest{ // Git log default date format - https://github.com/araddon/dateparse/pull/92 {in: "Thu Apr 7 15:13:13 2005 -0700", out: "2005-04-07 22:13:13 +0000 UTC"}, {in: "Tue Dec 12 23:07:11 2023 -0700", out: "2023-12-13 06:07:11 +0000 UTC"}, + // RabbitMQ log format - https://github.com/araddon/dateparse/pull/122 + {in: "8-Mar-2018::14:09:27", out: "2018-03-08 14:09:27 +0000 UTC"}, + {in: "08-03-2018::02:09:29 PM", out: "2018-03-08 14:09:29 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss,000 {in: "2014-05-11 08:20:13,787", out: "2014-05-11 08:20:13.787 +0000 UTC"}, // yyyy-mm-dd hh:mm:ss +0000 @@ -827,6 +830,10 @@ var testParseErrors = []dateTest{ {in: "2014-02-13 00:00:00 utc", err: true}, // lowercase timezones are not valid {in: "2014-02-13t00:00:00.0z", err: true}, // lowercase 't' separator is not supported {in: "2014-02-13T00:00:00.0z", err: true}, // lowercase 'z' zulu timezone indicator not a valid format + // Invalid variants of RabbitMQ log format + {in: "8-Mar-2018:14:09:27", err: true}, + {in: "8-Mar-2018: 14:09:27", err: true}, + {in: "8-Mar-2018:::14:09:27", err: true}, } func TestParseErrors(t *testing.T) { @@ -1093,5 +1100,5 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) { // Convenience function for debugging a particular broken test case func TestDebug(t *testing.T) { - MustParse("Tue Dec 12 23:07:11 2023 -0700") + MustParse("8-Mar-2018::14:09:27") }