Add support for dd-mm-yyyy (digit month) formats

Some European dates are formatted as dd-mm-yyyy where
day month and year are all digits.

Fixes #139

Signed-off-by: Daniel Ferstay <dferstay@splunk.com>
This commit is contained in:
Daniel Ferstay 2021-10-26 15:26:22 -07:00
parent 6b43995a97
commit 65e246add0
2 changed files with 77 additions and 2 deletions

View File

@ -93,6 +93,8 @@ const (
dateAlphaPeriodWsDigit dateAlphaPeriodWsDigit
dateWeekdayComma dateWeekdayComma
dateWeekdayAbbrevComma dateWeekdayAbbrevComma
dateDigitDashDigit
dateDigitDashDigitDash
) )
const ( const (
// Time state // Time state
@ -485,6 +487,9 @@ iterRunes:
if unicode.IsLetter(r) { if unicode.IsLetter(r) {
p.stateDate = dateDigitDashAlpha p.stateDate = dateDigitDashAlpha
p.moi = i p.moi = i
} else if unicode.IsDigit(r) {
p.stateDate = dateDigitDashDigit
p.moi = i
} else { } else {
return nil, unknownErr(datestr) return nil, unknownErr(datestr)
} }
@ -499,7 +504,15 @@ iterRunes:
p.yeari = i + 1 p.yeari = i + 1
p.stateDate = dateDigitDashAlphaDash p.stateDate = dateDigitDashAlphaDash
} }
case dateDigitDashDigit:
// 29-06-2026
switch r {
case '-':
p.molen = i - p.moi
p.set(p.moi, "01")
p.yeari = i + 1
p.stateDate = dateDigitDashDigitDash
}
case dateDigitDashAlphaDash: case dateDigitDashAlphaDash:
// 13-Feb-03 ambiguous // 13-Feb-03 ambiguous
// 28-Feb-03 ambiguous // 28-Feb-03 ambiguous
@ -532,6 +545,36 @@ iterRunes:
p.stateTime = timeStart p.stateTime = timeStart
break iterRunes break iterRunes
} }
case dateDigitDashDigitDash:
// 29-06-2026
switch r {
case ' ':
// we need to find if this was 4 digits, aka year
// or 2 digits which makes it ambiguous year/day
length := i - (p.moi + p.molen + 1)
if length == 4 {
p.yearlen = 4
p.set(p.yeari, "2006")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
p.setDay()
} else if length == 2 {
// We have no idea if this is
// yy-mon-dd OR dd-mon-yy
//
// We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption
p.ambiguousMD = true
p.yearlen = 2
p.set(p.yeari, "06")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
p.setDay()
}
p.stateTime = timeStart
break iterRunes
}
case dateDigitYearSlash: case dateDigitYearSlash:
// 2014/07/10 06:55:38.156283 // 2014/07/10 06:55:38.156283
@ -1844,6 +1887,33 @@ iterRunes:
p.setDay() p.setDay()
} }
return p, nil
case dateDigitDashDigitDash:
// 13-02-03 ambiguous
// 28-02-03 ambiguous
// 29-06-2016
length := len(datestr) - (p.moi + p.molen + 1)
if length == 4 {
p.yearlen = 4
p.set(p.yeari, "2006")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
p.setDay()
} else if length == 2 {
// We have no idea if this is
// yy-mon-dd OR dd-mon-yy
//
// We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption
p.ambiguousMD = true
p.yearlen = 2
p.set(p.yeari, "06")
// We now also know that part1 was the day
p.dayi = 0
p.daylen = p.part1Len
p.setDay()
}
return p, nil return p, nil
case dateDigitDot: case dateDigitDot:

View File

@ -239,10 +239,15 @@ var testInputs = []dateTest{
// yyyy-mm-dd-07:00 // yyyy-mm-dd-07:00
{in: "2020-07-20+08:00", out: "2020-07-19 16:00:00 +0000 UTC"}, {in: "2020-07-20+08:00", out: "2020-07-19 16:00:00 +0000 UTC"},
{in: "2020-07-20+0800", out: "2020-07-19 16:00:00 +0000 UTC"}, {in: "2020-07-20+0800", out: "2020-07-19 16:00:00 +0000 UTC"},
// dd-mmm-yy // dd-mmm-yy (alpha month)
{in: "28-Feb-02", out: "2002-02-28 00:00:00 +0000 UTC"}, {in: "28-Feb-02", out: "2002-02-28 00:00:00 +0000 UTC"},
{in: "15-Jan-18", out: "2018-01-15 00:00:00 +0000 UTC"}, {in: "15-Jan-18", out: "2018-01-15 00:00:00 +0000 UTC"},
{in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"}, {in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"},
// dd-mmm-yy (digit month)
{in: "28-02-02", out: "2002-02-28 00:00:00 +0000 UTC"}, https: //github.com/araddon/dateparse/issues/139
{in: "15-01-18", out: "2018-01-15 00:00:00 +0000 UTC"},
{in: "15-01-2017", out: "2017-01-15 00:00:00 +0000 UTC"},
// yyyy-mm // yyyy-mm
{in: "2014-04", out: "2014-04-01 00:00:00 +0000 UTC"}, {in: "2014-04", out: "2014-04-01 00:00:00 +0000 UTC"},
// yyyy-mm-dd hh:mm:ss AM // yyyy-mm-dd hh:mm:ss AM