commit 1045eb74978b0936743a76a3b690ccdd6e85450e from: Oliver Lowe date: Sat Apr 19 06:47:32 2025 UTC rss: handle more timestamp variants I parsed every feed listed in opml/otl.opml and found a bunch of variants of RFC 822 and RFC 1123 timestamps used. commit - 5f9e6325936e158cd3c8afcbd5005b278dc9a855 commit + 1045eb74978b0936743a76a3b690ccdd6e85450e blob - 4764ad87ee5f6482c61abe96e62f5e5ccb9c5bd7 blob + a5c140780646d227a942d7a707253af3797847c6 --- rss/rss.go +++ rss/rss.go @@ -2,6 +2,7 @@ package rss import ( "encoding/xml" + "fmt" "io" "time" ) @@ -44,16 +45,16 @@ func (ch *Channel) UnmarshalXML(d *xml.Decoder, start } if aux.PubDate != "" { - t, err := time.Parse(time.RFC1123Z, aux.PubDate) + t, err := parseTime(aux.PubDate) if err != nil { - return err + return fmt.Errorf("parse published date %q: %w", aux.PubDate, err) } ch.PubDate = t } if aux.LastBuildDate != "" { - t, err := time.Parse(time.RFC1123Z, aux.LastBuildDate) + t, err := parseTime(aux.LastBuildDate) if err != nil { - return err + return fmt.Errorf("parse last build date %q: %w", aux.LastBuildDate, err) } ch.LastBuildDate = t } @@ -98,9 +99,9 @@ func (it *Item) UnmarshalXML(d *xml.Decoder, start xml return err } if aux.PubDate != "" { - t, err := time.Parse(time.RFC1123Z, aux.PubDate) + t, err := parseTime(aux.PubDate) if err != nil { - return err + return fmt.Errorf("parse published date %q: %w", aux.PubDate, err) } it.PubDate = t } @@ -118,3 +119,19 @@ func Decode(r io.Reader) (*RSS, error) { } return &rss, nil } + +func parseTime(s string) (time.Time, error) { + layouts := []string{ + time.RFC1123Z, time.RFC1123, + time.RFC822Z, time.RFC822, + "Mon, _2 Jan 2006 15:04:05 -0700", // rfc1123z with no trailing zero + "Mon, _2 January 2006 15:04:05 -0700", // long month name + } + for _, l := range layouts { + t, err := time.Parse(l, s) + if err == nil { + return t, nil + } + } + return time.Time{}, fmt.Errorf("unsupported layout") +} blob - f2cf2b7c74ea4e2ea0fe2d68bcaa70f6e51dfa26 blob + b194fe55a25f5e30dfa9aa05d351b3d5c843e5cc --- rss/rss_test.go +++ rss/rss_test.go @@ -61,3 +61,20 @@ func TestEmpty(t *testing.T) { t.Fatal(err) } } + +func TestParseTime(t *testing.T) { + var tests = []struct { + name string + timestamp string + }{ + {"no leading zero", "Fri, 4 Feb 2022 09:30:00 +1300"}, // https://benhoyt.com/writings/rss.xml + {"long month", "Mon, 10 June 2024 12:20:00 +0000"}, // https://www.claws-mail.org/releases.rss + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if _, err := parseTime(tt.timestamp); err != nil { + t.Errorf("parse %q: %v", tt.timestamp, err) + } + }) + } +}