Incorporate support for dd-mm-yyyy (digit month)

Incorporate PR https://github.com/araddon/dateparse/pull/140 from
https://github.com/dferstay to fix
https://github.com/araddon/dateparse/issues/139

This also fixes https://github.com/araddon/dateparse/issues/155
(duplicate of issue 139)

PR is adapted to avoid duplicate code and validate format.
This commit is contained in:
Klondike Dragon 2023-12-12 23:07:11 -07:00
parent 18938f16ae
commit fc278d32da
2 changed files with 88 additions and 39 deletions

View File

@ -59,26 +59,28 @@ const (
dateYearDash dateYearDash
dateYearDashAlphaDash dateYearDashAlphaDash
dateYearDashDash dateYearDashDash
dateYearDashDashWs // 5 dateYearDashDashWs // 6
dateYearDashDashT dateYearDashDashT
dateYearDashDashOffset dateYearDashDashOffset
dateDigitDash dateDigitDash
dateDigitDashAlpha dateDigitDashAlpha
dateDigitDashAlphaDash // 10 dateDigitDashAlphaDash // 11
dateDigitDashDigit
dateDigitDashDigitDash
dateDigitDot dateDigitDot
dateDigitDotDot dateDigitDotDot
dateDigitSlash dateDigitSlash
dateDigitYearSlash dateDigitYearSlash
dateDigitSlashAlpha // 15 dateDigitSlashAlpha // 18
dateDigitColon dateDigitColon
dateDigitChineseYear dateDigitChineseYear
dateDigitChineseYearWs dateDigitChineseYearWs
dateDigitWs dateDigitWs
dateDigitWsMoYear // 20 dateDigitWsMoYear // 23
dateAlpha dateAlpha
dateAlphaWs dateAlphaWs
dateAlphaWsDigit dateAlphaWsDigit
dateAlphaWsDigitMore // 24 dateAlphaWsDigitMore // 27
dateAlphaWsDigitMoreWs dateAlphaWsDigitMoreWs
dateAlphaWsDigitMoreWsYear dateAlphaWsDigitMoreWsYear
dateAlphaWsMonth dateAlphaWsMonth
@ -88,7 +90,7 @@ const (
dateAlphaWsMore dateAlphaWsMore
dateAlphaWsAtTime dateAlphaWsAtTime
dateAlphaWsAlpha dateAlphaWsAlpha
dateAlphaWsAlphaYearmaybe // 34 dateAlphaWsAlphaYearmaybe // 37
dateAlphaPeriodWsDigit dateAlphaPeriodWsDigit
dateAlphaSlash dateAlphaSlash
dateAlphaSlashDigit dateAlphaSlashDigit
@ -545,6 +547,9 @@ iterRunes:
if unicode.IsLetter(r) { if unicode.IsLetter(r) {
p.stateDate = dateDigitDashAlpha p.stateDate = dateDigitDashAlpha
p.moi = i p.moi = i
} else if unicode.IsDigit(r) {
p.stateDate = dateDigitDashDigit
p.moi = i
} else { } else {
return p, unknownErr(datestr) return p, unknownErr(datestr)
} }
@ -560,10 +565,29 @@ iterRunes:
p.stateDate = dateDigitDashAlphaDash p.stateDate = dateDigitDashAlphaDash
} }
case dateDigitDashAlphaDash: case dateDigitDashDigit:
// 29-06-2026
switch r {
case '-':
// X
// 29-06-2026
p.molen = i - p.moi
if p.molen == 2 {
p.set(p.moi, "01")
p.yeari = i + 1
p.stateDate = dateDigitDashDigitDash
} else {
return p, unknownErr(datestr)
}
}
case dateDigitDashAlphaDash, dateDigitDashDigitDash:
// dateDigitDashAlphaDash:
// 13-Feb-03 ambiguous // 13-Feb-03 ambiguous
// 28-Feb-03 ambiguous // 28-Feb-03 ambiguous
// 29-Jun-2016 dd-month(alpha)-yyyy // 29-Jun-2016 dd-month(alpha)-yyyy
// dateDigitDashDigitDash:
// 29-06-2026
switch r { switch r {
case ' ': case ' ':
// we need to find if this was 4 digits, aka year // we need to find if this was 4 digits, aka year
@ -581,8 +605,11 @@ iterRunes:
} else if length == 2 { } else if length == 2 {
// We have no idea if this is // We have no idea if this is
// yy-mon-dd OR dd-mon-yy // yy-mon-dd OR dd-mon-yy
// (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy)
// //
// We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way.
p.ambiguousMD = true p.ambiguousMD = true
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
@ -592,6 +619,8 @@ iterRunes:
if !p.setDay() { if !p.setDay() {
return p, unknownErr(datestr) return p, unknownErr(datestr)
} }
} else {
return p, unknownErr(datestr)
} }
p.stateTime = timeStart p.stateTime = timeStart
break iterRunes break iterRunes
@ -2055,10 +2084,15 @@ iterRunes:
case dateYearDashDashT: case dateYearDashDashT:
return p, nil return p, nil
case dateDigitDashAlphaDash: case dateDigitDashAlphaDash, dateDigitDashDigitDash:
// This has already been done if we parsed the time already
if p.stateTime == timeIgnore {
// dateDigitDashAlphaDash:
// 13-Feb-03 ambiguous // 13-Feb-03 ambiguous
// 28-Feb-03 ambiguous // 28-Feb-03 ambiguous
// 29-Jun-2016 // 29-Jun-2016
// dateDigitDashDigitDash:
// 29-06-2026
length := len(p.datestr) - (p.moi + p.molen + 1) length := len(p.datestr) - (p.moi + p.molen + 1)
if length == 4 { if length == 4 {
p.yearlen = 4 p.yearlen = 4
@ -2072,8 +2106,11 @@ iterRunes:
} else if length == 2 { } else if length == 2 {
// We have no idea if this is // We have no idea if this is
// yy-mon-dd OR dd-mon-yy // yy-mon-dd OR dd-mon-yy
// (or for dateDigitDashDigitDash, yy-mm-dd OR dd-mm-yy)
// //
// We are going to ASSUME (bad, bad) that it is dd-mon-yy which is a horible assumption // We are going to ASSUME (bad, bad) that it is dd-mon-yy (dd-mm-yy),
// which is a horrible assumption, but seems to be the convention for
// dates that are formatted in this way.
p.ambiguousMD = true p.ambiguousMD = true
p.yearlen = 2 p.yearlen = 2
p.set(p.yeari, "06") p.set(p.yeari, "06")
@ -2083,6 +2120,9 @@ iterRunes:
if !p.setDay() { if !p.setDay() {
return p, unknownErr(datestr) return p, unknownErr(datestr)
} }
} else {
return p, unknownErr(datestr)
}
} }
return p, nil return p, nil

View File

@ -324,10 +324,20 @@ var testInputs = []dateTest{
// yyyy-mm-dd-07:00 // yyyy-mm-dd-07:00
{in: "2020-07-20+08:00", out: "2020-07-19 16:00:00 +0000 UTC"}, {in: "2020-07-20+08:00", out: "2020-07-19 16:00:00 +0000 UTC"},
{in: "2020-07-20+0800", out: "2020-07-19 16:00:00 +0000 UTC"}, {in: "2020-07-20+0800", out: "2020-07-19 16:00:00 +0000 UTC"},
// dd-mmm-yy // dd-mmm-yy (alpha month)
{in: "28-Feb-02", out: "2002-02-28 00:00:00 +0000 UTC"}, {in: "28-Feb-02", out: "2002-02-28 00:00:00 +0000 UTC"},
{in: "15-Jan-18", out: "2018-01-15 00:00:00 +0000 UTC"}, {in: "15-Jan-18", out: "2018-01-15 00:00:00 +0000 UTC"},
{in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"}, {in: "15-Jan-2017", out: "2017-01-15 00:00:00 +0000 UTC"},
{in: "28-Feb-02 15:16:17", out: "2002-02-28 15:16:17 +0000 UTC"},
{in: "15-Jan-18 15:16:17", out: "2018-01-15 15:16:17 +0000 UTC"},
{in: "15-Jan-2017 15:16:17", out: "2017-01-15 15:16:17 +0000 UTC"},
// dd-mm-yy (digit month - potentially ambiguous) - https://github.com/araddon/dateparse/issues/139
{in: "28-02-02", out: "2002-02-28 00:00:00 +0000 UTC"},
{in: "15-01-18", out: "2018-01-15 00:00:00 +0000 UTC"},
{in: "15-01-2017", out: "2017-01-15 00:00:00 +0000 UTC"},
{in: "28-02-02 15:16:17", out: "2002-02-28 15:16:17 +0000 UTC"},
{in: "15-01-18 15:16:17", out: "2018-01-15 15:16:17 +0000 UTC"},
{in: "15-01-2017 15:16:17", out: "2017-01-15 15:16:17 +0000 UTC"},
// yyyy-mm // yyyy-mm
{in: "2014-04", out: "2014-04-01 00:00:00 +0000 UTC"}, {in: "2014-04", out: "2014-04-01 00:00:00 +0000 UTC"},
// yyyy-mm-dd hh:mm:ss AM // yyyy-mm-dd hh:mm:ss AM
@ -733,7 +743,6 @@ var testParseErrors = []dateTest{
{in: "oct.-7-1970", err: true}, {in: "oct.-7-1970", err: true},
{in: "septe. 7, 1970", err: true}, {in: "septe. 7, 1970", err: true},
{in: "SeptemberRR 7th, 1970", err: true}, {in: "SeptemberRR 7th, 1970", err: true},
{in: "29-06-2016", err: true},
// this is just testing the empty space up front // this is just testing the empty space up front
{in: " 2018-01-02 17:08:09 -07:00", err: true}, {in: " 2018-01-02 17:08:09 -07:00", err: true},
// a semantic version number should not be interpreted as a date // a semantic version number should not be interpreted as a date