mirror of
https://github.com/araddon/dateparse.git
synced 2025-01-18 18:56:47 +08:00
Cleanup handling of TZ name parsing
Fully support the format where a TZ name is in parentheses after the time (and possibly after an offset). This fixes the broken case where a 4 character TZ name was in parentheses after a time.
This commit is contained in:
parent
c4de5d4f6a
commit
d5b3c60e9b
160
parseany.go
160
parseany.go
@ -1895,9 +1895,8 @@ iterRunes:
|
||||
if !p.setYear() {
|
||||
return p, p.unknownErr(datestr)
|
||||
}
|
||||
} else {
|
||||
// allow multiple trailing whitespace
|
||||
}
|
||||
// else allow multiple trailing whitespace
|
||||
case '+', '-':
|
||||
// The year must be followed by a space before an offset!
|
||||
if p.yearlen > 0 {
|
||||
@ -1942,12 +1941,10 @@ iterRunes:
|
||||
} else {
|
||||
p.tzlen = i - p.tzi
|
||||
}
|
||||
if p.tzlen == 4 {
|
||||
p.set(p.tzi, " MST")
|
||||
} else if p.tzlen == 3 {
|
||||
p.set(p.tzi, "MST")
|
||||
} else if p.tzlen > 0 {
|
||||
return p, p.unknownErr(datestr)
|
||||
if p.tzlen > 0 {
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
}
|
||||
p.stateTime = timeWsAlphaZoneOffset
|
||||
p.offseti = i
|
||||
@ -1956,12 +1953,8 @@ iterRunes:
|
||||
// 17:57:51 MST
|
||||
// 06:20:00 (EST)
|
||||
p.tzlen = i - p.tzi
|
||||
if p.tzlen == 4 {
|
||||
p.set(p.tzi, " MST")
|
||||
} else if p.tzlen == 3 {
|
||||
p.set(p.tzi, "MST")
|
||||
} else if p.tzlen > 0 {
|
||||
return p, p.unknownErr(datestr)
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
if r == ' ' {
|
||||
p.stateTime = timeWsAlphaWs
|
||||
@ -2205,19 +2198,8 @@ iterRunes:
|
||||
case r == ' ':
|
||||
if p.tzi > 0 {
|
||||
p.tzlen = i - p.tzi
|
||||
switch p.tzlen {
|
||||
case 3:
|
||||
// 13:31:51.999 +01:00 CET
|
||||
p.set(p.tzi, "MST")
|
||||
case 4:
|
||||
// 13:31:51.999 +01:00 CEST
|
||||
p.set(p.tzi, "MST ")
|
||||
default:
|
||||
if p.simpleErrorMessages {
|
||||
return p, ErrUnknownTimeZone
|
||||
} else {
|
||||
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen])
|
||||
}
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
} else {
|
||||
return p, p.unknownErr(datestr)
|
||||
@ -2353,18 +2335,9 @@ iterRunes:
|
||||
|
||||
switch p.stateTime {
|
||||
case timeWsAlpha:
|
||||
switch len(p.datestr) - p.tzi {
|
||||
case 3:
|
||||
// 13:31:51.999 +01:00 CET
|
||||
p.set(p.tzi, "MST")
|
||||
case 4:
|
||||
p.set(p.tzi, "MST ")
|
||||
default:
|
||||
if p.simpleErrorMessages {
|
||||
return p, ErrUnknownTimeZone
|
||||
} else {
|
||||
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:])
|
||||
}
|
||||
p.tzlen = i - p.tzi
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
|
||||
case timeWsAlphaRParen:
|
||||
@ -2377,10 +2350,26 @@ iterRunes:
|
||||
}
|
||||
case timeWsOffsetWsTZDescInParen:
|
||||
// The last character must be a closing ')'
|
||||
if len(p.datestr) <= 0 || p.datestr[i-1] != ')' {
|
||||
if i <= 0 || p.datestr[i-1] != ')' {
|
||||
return p, p.unknownErr(datestr)
|
||||
}
|
||||
p.trimExtra(false)
|
||||
// As a special case, if we don't yet have a timezone name,
|
||||
// and the content in the paren is 3-4 characters, then treat
|
||||
// this as a time zone name instead
|
||||
if len(p.datestr) >= p.extra+1+3+1 {
|
||||
parenContentsLen := (i - 1) - (p.extra + 2)
|
||||
if p.tzi == 0 && (parenContentsLen >= 3 && parenContentsLen <= 4) {
|
||||
p.tzi = p.extra + 2
|
||||
p.tzlen = parenContentsLen
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
p.extra = 0
|
||||
}
|
||||
}
|
||||
if p.extra > 0 {
|
||||
p.trimExtra(false)
|
||||
}
|
||||
case timeWsAlphaZoneOffset:
|
||||
// 06:20:00 UTC-05
|
||||
if err := p.setTZOffset(i, datestr); err != nil {
|
||||
@ -2418,19 +2407,9 @@ iterRunes:
|
||||
case timeWsOffsetWsAlphaZone:
|
||||
// 00:12:00 +0000 UTC
|
||||
if p.tzi > 0 {
|
||||
switch len(p.datestr) - p.tzi {
|
||||
case 3:
|
||||
// 13:31:51.999 +01:00 CET
|
||||
p.set(p.tzi, "MST")
|
||||
case 4:
|
||||
// 13:31:51.999 +01:00 CEST
|
||||
p.set(p.tzi, "MST ")
|
||||
default:
|
||||
if p.simpleErrorMessages {
|
||||
return p, ErrUnknownTimeZone
|
||||
} else {
|
||||
return p, fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:])
|
||||
}
|
||||
p.tzlen = i - p.tzi
|
||||
if err := p.setTZName(datestr); err != nil {
|
||||
return p, err
|
||||
}
|
||||
} else {
|
||||
return p, p.unknownErr(datestr)
|
||||
@ -2940,6 +2919,44 @@ func (p *parser) setTZOffset(i int, datestr string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *parser) setTZName(datestr string) error {
|
||||
switch p.tzlen {
|
||||
case 3:
|
||||
p.set(p.tzi, "MST")
|
||||
case 4:
|
||||
p.set(p.tzi, "MST ")
|
||||
default:
|
||||
if p.simpleErrorMessages {
|
||||
return ErrUnknownTimeZone
|
||||
} else {
|
||||
return fmt.Errorf("%w %q near %q (must be 3 or 4 characters)", ErrUnknownTimeZone, datestr, p.datestr[p.tzi:p.tzi+p.tzlen])
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Removes the characters at the given range from the format string.
|
||||
// Fills the end of the format string with spaces rather than shortening it.
|
||||
func (p *parser) removeRangeFromFormat(i, numBytes int) {
|
||||
if i < 0 || i >= len(p.format) {
|
||||
return
|
||||
}
|
||||
var startErase int
|
||||
afterRemovedRange := i + numBytes
|
||||
bytesToCopy := len(p.format) - afterRemovedRange
|
||||
if bytesToCopy <= 0 {
|
||||
// nothing to copy, erase everything from the removal point
|
||||
startErase = i
|
||||
} else {
|
||||
copy(p.format[i:], p.format[afterRemovedRange:])
|
||||
startErase = i + bytesToCopy
|
||||
}
|
||||
// fill in spaces to erase the moved content in its old location
|
||||
for index := startErase; index < len(p.format); index++ {
|
||||
p.format[index] = ' '
|
||||
}
|
||||
}
|
||||
|
||||
// Find the proper end of the current component (scanning chars starting from start and going
|
||||
// up until the end, and either returning at end or returning the first character that is
|
||||
// not allowed, as determined by allowNumeric, allowAlpha, and allowOther)
|
||||
@ -3097,6 +3114,26 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
|
||||
if p.t != nil {
|
||||
return *p.t, nil
|
||||
}
|
||||
|
||||
// Make sure that the entire string matched to a known format that was detected
|
||||
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
||||
// We can always ignore punctuation at the end of a date/time, but do not allow
|
||||
// any numbers or letters in the format string.
|
||||
validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true)
|
||||
if validFormatTo < len(p.format) {
|
||||
return time.Time{}, p.unexpectedTail(p.formatSetLen)
|
||||
}
|
||||
}
|
||||
|
||||
// Special case where the TZ name is 4 characters long and followed by punctuation, will cause parsing problems
|
||||
// with the format 'MST ' (will expect a whitespace that isn't there after 4 char timezone). Most robust
|
||||
// solution is to remove the extra whitespace. Even though it will cause offsets after this point to not match
|
||||
// between the datestr and format string, it's not an issue at this point.
|
||||
if p.tzlen == 4 && p.tzi+4 < len(p.format) && p.format[p.tzi+3] == ' ' && p.format[p.tzi+4] != ' ' {
|
||||
p.removeRangeFromFormat(p.tzi+3, 1)
|
||||
}
|
||||
|
||||
// If we have a full month name, update the format string to use it (can change length of format string)
|
||||
if len(p.fullMonth) > 0 {
|
||||
p.setFullMonth(p.fullMonth)
|
||||
}
|
||||
@ -3110,7 +3147,7 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
|
||||
// get out of this function to reduce scope it needs to be applied on
|
||||
if err != nil && strings.Contains(err.Error(), "month out of range") {
|
||||
// simple optimized case where mm and dd can be swapped directly
|
||||
if p.molen == 2 && p.daylen == 2 {
|
||||
if p.molen == 2 && p.daylen == 2 && len(p.fullMonth) <= 0 && (p.tzi == 0 || (p.moi < p.tzi && p.dayi < p.tzi)) {
|
||||
// skipped bytes have already been removed, so compensate for that
|
||||
moi := p.moi - p.skip
|
||||
p.moi = p.dayi - p.skip
|
||||
@ -3144,17 +3181,10 @@ func (p *parser) parse(originalLoc *time.Location, originalOpts ...ParserOption)
|
||||
}()
|
||||
}
|
||||
|
||||
// Make sure that the entire string matched to a known format that was detected
|
||||
if !p.allowPartialStringMatch && p.formatSetLen < len(p.format) {
|
||||
// We can always ignore punctuation at the end of a date/time, but do not allow
|
||||
// any numbers or letters in the format string.
|
||||
validFormatTo := findProperEnd(bytesToString(p.format), p.formatSetLen, len(p.format), false, false, true)
|
||||
if validFormatTo < len(p.format) {
|
||||
return time.Time{}, p.unexpectedTail(p.formatSetLen)
|
||||
}
|
||||
if p.skip > len(p.format) {
|
||||
p.skip = len(p.format)
|
||||
}
|
||||
|
||||
if p.skip > 0 && len(p.format) > p.skip {
|
||||
if p.skip > 0 {
|
||||
// copy and then re-slice to shorten to avoid losing the header of the pooled format string
|
||||
copy(p.format, p.format[p.skip:])
|
||||
p.format = p.format[:len(p.format)-p.skip]
|
||||
|
@ -225,9 +225,11 @@ var testInputs = []dateTest{
|
||||
{in: "Thu, 03 Jul 2017 8:08:04 +0100", out: "2017-07-03 07:08:04 +0000 UTC"},
|
||||
{in: "Thu, 03 Jul 2017 8:8:4 +0100", out: "2017-07-03 07:08:04 +0000 UTC"},
|
||||
//
|
||||
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC"},
|
||||
{in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC"},
|
||||
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", zname: "CEST"},
|
||||
{in: "Tue, 5 Jul 2017 04:08:03 -0700 (MST)", out: "2017-07-05 11:08:03 +0000 UTC", zname: "MST"},
|
||||
{in: "Tue, 11 Jul 2017 04:08:03 +0200 (CEST)", out: "2017-07-11 02:08:03 +0000 UTC", loc: "Europe/Berlin", zname: "CEST"},
|
||||
{in: "Tue, 11 Jul 2017 04:08:03 (CEST)", out: "2017-07-11 04:08:03 +0000 UTC", zname: "CEST"},
|
||||
{in: "Tue, 5 Jul 2017 04:08:03 (MST)", out: "2017-07-05 04:08:03 +0000 UTC", zname: "MST"},
|
||||
// day, dd-Mon-yy hh:mm:zz TZ
|
||||
{in: "Fri, 03-Jul-15 08:08:08 MST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "MST"},
|
||||
{in: "Fri, 03-Jul-15 08:08:08 CEST", out: "2015-07-03 08:08:08 +0000 UTC", zname: "CEST"},
|
||||
@ -330,14 +332,18 @@ var testInputs = []dateTest{
|
||||
{in: "04/02/2014 04:08:09 AM", out: "2014-04-02 04:08:09 +0000 UTC"},
|
||||
{in: "04/02/2014 04:08:09AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09 AM PST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09 AM (PST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08:09 AM CEST", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08:09 AM (CEST)", out: "2014-04-02 04:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08:09pm", out: "2014-04-02 16:08:09 +0000 UTC"},
|
||||
{in: "04/02/2014 04:08:09 PM", out: "2014-04-02 16:08:09 +0000 UTC"},
|
||||
{in: "04/02/2014 04:08:09PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09 PM PST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09 PM (PST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "PST"},
|
||||
{in: "04/02/2014 04:08:09pm CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08:09 PM CEST", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08:09 PM (CEST)", out: "2014-04-02 16:08:09 +0000 UTC", zname: "CEST"},
|
||||
{in: "04/02/2014 04:08am", out: "2014-04-02 04:08:00 +0000 UTC"},
|
||||
{in: "04/02/2014 04:08 AM", out: "2014-04-02 04:08:00 +0000 UTC"},
|
||||
{in: "04/02/2014 04:08pm", out: "2014-04-02 16:08:00 +0000 UTC"},
|
||||
@ -822,7 +828,7 @@ func TestParse(t *testing.T) {
|
||||
}
|
||||
fullInput := prefix + th.in
|
||||
|
||||
t.Run(fmt.Sprintf("simpleerr-%v-addweekday-%v-%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) {
|
||||
t.Run(fmt.Sprintf("simpleerr-%v/addweekday-%v/%s", simpleErrorMessage, addWeekday, fullInput), func(t *testing.T) {
|
||||
var ts time.Time
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
@ -1167,6 +1173,9 @@ func TestInLocation(t *testing.T) {
|
||||
ts = MustParse("Tue, 5 Jul 2017 16:28:13 -0700 (MST)")
|
||||
assert.Equal(t, "2017-07-05 23:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
|
||||
|
||||
ts = MustParse("Tue, 5 Jul 2017 16:28:13 +0300 (CEST)")
|
||||
assert.Equal(t, "2017-07-05 13:28:13 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
|
||||
|
||||
// Now we are going to use ParseIn() and see that it gives different answer
|
||||
// with different zone, offset
|
||||
time.Local = nil
|
||||
@ -1311,6 +1320,6 @@ func TestRetryAmbiguousDateWithSwap(t *testing.T) {
|
||||
|
||||
// Convenience function for debugging a particular broken test case
|
||||
func TestDebug(t *testing.T) {
|
||||
ts := MustParse("Monday 19/03/2012 00:00:00", RetryAmbiguousDateWithSwap(true))
|
||||
assert.Equal(t, "2012-03-19 00:00:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
|
||||
ts := MustParse("September 17, 2012 at 10:09am CEST+02", RetryAmbiguousDateWithSwap(true))
|
||||
assert.Equal(t, "2012-09-17 08:09:00 +0000 UTC", fmt.Sprintf("%v", ts.In(time.UTC)))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user