1
0
Fork 0
forked from 0x2E/fusion

Move RSS parsing code from pull to a dedicated package (#96)

* Create a client package

* Bring back TestDecideFeedUpdateAction, which was removed accidentally

* Fix import order
This commit is contained in:
Michael Lynch 2025-03-21 09:13:20 -04:00 committed by GitHub
parent d2cb870574
commit 68760f2ce6
Signed by: github
GPG key ID: B5690EEEBB952194
7 changed files with 677 additions and 334 deletions

View file

@ -10,6 +10,7 @@ import (
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/repo"
"github.com/0x2e/fusion/service/pull"
"github.com/0x2e/fusion/service/pull/client"
"github.com/0x2e/fusion/service/sniff"
)
@ -119,11 +120,11 @@ func (f Feed) Create(ctx context.Context, req *ReqFeedCreate) error {
}
func (f Feed) CheckValidity(ctx context.Context, req *ReqFeedCheckValidity) (*RespFeedCheckValidity, error) {
if parsed, err := pull.FetchFeed(ctx, &model.Feed{Link: &req.Link}); err == nil && parsed != nil {
if title, err := client.NewFeedClient().FetchTitle(ctx, req.Link, model.FeedRequestOptions{}); err == nil {
return &RespFeedCheckValidity{
FeedLinks: []ValidityItem{
{
Title: &parsed.Title,
Title: &title,
Link: &req.Link,
},
},

View file

@ -0,0 +1,79 @@
package client
import (
"context"
"fmt"
"io"
"net/http"
"time"
"github.com/mmcdole/gofeed"
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/pkg/httpx"
)
type HttpRequestFn func(ctx context.Context, link string, options *model.FeedRequestOptions) (*http.Response, error)
// FeedClient retrieves a feed given a feed URL and parses the result.
type FeedClient struct {
httpRequestFn HttpRequestFn
}
// NewFeedClient creates a feed client with the default options.
func NewFeedClient() FeedClient {
return NewFeedClientWithRequestFn(httpx.FusionRequest)
}
// NewFeedClientWithRequestFn creates a feed client that uses a custom
// HttpRequestFn to retrieve remote feeds.
func NewFeedClientWithRequestFn(httpRequestFn HttpRequestFn) FeedClient {
return FeedClient{
httpRequestFn: httpRequestFn,
}
}
func (c FeedClient) FetchTitle(ctx context.Context, feedURL string, options model.FeedRequestOptions) (string, error) {
feed, err := c.fetchFeed(ctx, feedURL, options)
if err != nil {
return "", err
}
return feed.Title, nil
}
type FetchItemsResult struct {
LastBuild *time.Time
Items []*model.Item
}
func (c FeedClient) FetchItems(ctx context.Context, feedURL string, options model.FeedRequestOptions) (FetchItemsResult, error) {
feed, err := c.fetchFeed(ctx, feedURL, options)
if err != nil {
return FetchItemsResult{}, err
}
return FetchItemsResult{
LastBuild: feed.UpdatedParsed,
Items: ParseGoFeedItems(feed.Items),
}, nil
}
func (c FeedClient) fetchFeed(ctx context.Context, feedURL string, options model.FeedRequestOptions) (*gofeed.Feed, error) {
resp, err := c.httpRequestFn(ctx, feedURL, &options)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("got status code %d", resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return gofeed.NewParser().ParseString(string(data))
}

View file

@ -0,0 +1,566 @@
package client_test
import (
"context"
"errors"
"net/http"
"strings"
"testing"
"time"
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/pkg/ptr"
"github.com/0x2e/fusion/service/pull/client"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// mockReadCloser is a mock io.ReadCloser that can return either data or an error.
type mockReadCloser struct {
result string
errMsg string
reader *strings.Reader
}
func (m *mockReadCloser) Read(p []byte) (n int, err error) {
if m.errMsg != "" {
return 0, errors.New(m.errMsg)
}
if m.reader == nil {
m.reader = strings.NewReader(m.result)
}
return m.reader.Read(p)
}
func (m *mockReadCloser) Close() error {
return nil
}
type mockHTTPClient struct {
resp *http.Response
err error
lastFeedURL string
lastOptions *model.FeedRequestOptions
}
func (m *mockHTTPClient) Get(ctx context.Context, link string, options *model.FeedRequestOptions) (*http.Response, error) {
// Store the last feed URL and options for assertions.
m.lastFeedURL = link
m.lastOptions = options
if m.err != nil {
return nil, m.err
}
return m.resp, nil
}
func TestFeedClientFetchTitle(t *testing.T) {
for _, tt := range []struct {
description string
feedURL string
options model.FeedRequestOptions
httpRespBody string
httpStatusCode int
httpErrMsg string
httpBodyReadErrMsg string
expectedTitle string
expectedErrMsg string
}{
{
description: "fetch title succeeds when HTTP request and RSS parse succeed",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed Title</title>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "Test Feed Title",
expectedErrMsg: "",
},
{
description: "fetch title succeeds with default behavior when options are nil",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed Title</title>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "Test Feed Title",
expectedErrMsg: "",
},
{
description: "fetch title succeeds when using configured proxy server",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{
ReqProxy: func() *string { s := "http://proxy.example.com:8080"; return &s }(),
},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed Title via Proxy</title>
<item>
<title>Test Item via Proxy</title>
<link>https://example.com/proxy-item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "Test Feed Title via Proxy",
expectedErrMsg: "",
},
{
description: "fetch title fails when HTTP request returns connection error",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: 0, // No status code since request errors
httpErrMsg: "connection refused",
httpBodyReadErrMsg: "",
expectedTitle: "",
expectedErrMsg: "connection refused",
},
{
description: "fetch title fails when HTTP response has non-200 status code",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusNotFound,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "",
expectedErrMsg: "got status code 404",
},
{
description: "fetch title fails when HTTP response body cannot be read",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "mock body read error",
expectedTitle: "",
expectedErrMsg: "mock body read error",
},
{
description: "fetch title fails when RSS content cannot be parsed",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<invalid>
<malformed>
<content>This is not a valid RSS feed</content>
</malformed>
</invalid>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "",
expectedErrMsg: "Failed to detect feed type",
},
{
description: "fetch title returns empty string when feed has no title",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedTitle: "",
expectedErrMsg: "",
},
} {
t.Run(tt.description, func(t *testing.T) {
body := &mockReadCloser{
result: tt.httpRespBody,
errMsg: tt.httpBodyReadErrMsg,
}
httpClient := &mockHTTPClient{
resp: &http.Response{
StatusCode: tt.httpStatusCode,
Status: http.StatusText(tt.httpStatusCode),
Body: body,
},
err: func() error {
if tt.httpErrMsg != "" {
return errors.New(tt.httpErrMsg)
}
return nil
}(),
}
actualTitle, actualErr := client.NewFeedClientWithRequestFn(httpClient.Get).FetchTitle(context.Background(), tt.feedURL, tt.options)
if tt.expectedErrMsg != "" {
require.Error(t, actualErr)
require.Contains(t, actualErr.Error(), tt.expectedErrMsg)
} else {
require.NoError(t, actualErr)
}
assert.Equal(t, tt.expectedTitle, actualTitle)
assert.Equal(t, tt.feedURL, httpClient.lastFeedURL, "Incorrect feed URL used")
assert.Equal(t, tt.options, *httpClient.lastOptions, "Incorrect HTTP request options")
})
}
}
func TestFeedClientFetchItems(t *testing.T) {
for _, tt := range []struct {
description string
feedURL string
options model.FeedRequestOptions
httpRespBody string
httpStatusCode int
httpErrMsg string
httpBodyReadErrMsg string
expectedResult client.FetchItemsResult
expectedErrMsg string
}{
{
description: "fetch succeeds with no LastBuild when feed has no updated time",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: nil, // UpdatedParsed is nil in this test case
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds and populates LastBuild from RSS lastBuildDate",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<lastBuildDate>2025-01-01T12:00:00Z</lastBuildDate>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-01-01T12:00:00Z"),
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds and populates LastBuild from Atom updated",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Test Feed</title>
<updated>2025-02-15T15:30:00Z</updated>
<entry>
<title>Test Item</title>
<link href="https://example.com/item"/>
</entry>
</feed>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-02-15T15:30:00Z"),
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds with different timezone in lastBuildDate",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<lastBuildDate>2025-01-01T07:00:00-05:00</lastBuildDate>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-01-01T12:00:00Z"), // Same time as UTC
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds with non-standard time format",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<lastBuildDate>Wed, 01 Jan 2025 12:00:00 GMT</lastBuildDate>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-01-01T12:00:00Z"), // Use UTC format since gofeed normalizes to UTC
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds with default behavior when options are nil",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<lastBuildDate>2025-01-01T12:00:00Z</lastBuildDate>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-01-01T12:00:00Z"),
Items: []*model.Item{
{
Title: ptr.To("Test Item"),
Link: ptr.To("https://example.com/item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds when using configured proxy server",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{
ReqProxy: func() *string { s := "http://proxy.example.com:8080"; return &s }(),
},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed via Proxy</title>
<lastBuildDate>2025-01-01T12:00:00Z</lastBuildDate>
<item>
<title>Test Item via Proxy</title>
<link>https://example.com/proxy-item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{
LastBuild: mustParseTime("2025-01-01T12:00:00Z"),
Items: []*model.Item{
{
Title: ptr.To("Test Item via Proxy"),
Link: ptr.To("https://example.com/proxy-item"),
},
},
},
expectedErrMsg: "",
},
{
description: "fetch fails when HTTP request returns connection error",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: 0, // No status code since request errors
httpErrMsg: "connection refused",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{},
expectedErrMsg: "connection refused",
},
{
description: "fetch fails when HTTP response has non-200 status code",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusNotFound,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{},
expectedErrMsg: "got status code 404",
},
{
description: "fetch fails when HTTP response body cannot be read",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "mock body read error",
expectedResult: client.FetchItemsResult{},
expectedErrMsg: "mock body read error",
},
{
description: "fetch fails when RSS content cannot be parsed",
feedURL: "https://example.com/feed.xml",
options: model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<invalid>
<malformed>
<content>This is not a valid RSS feed</content>
</malformed>
</invalid>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedResult: client.FetchItemsResult{},
expectedErrMsg: "Failed to detect feed type",
},
} {
t.Run(tt.description, func(t *testing.T) {
body := &mockReadCloser{
result: tt.httpRespBody,
errMsg: tt.httpBodyReadErrMsg,
}
httpClient := &mockHTTPClient{
resp: &http.Response{
StatusCode: tt.httpStatusCode,
Status: http.StatusText(tt.httpStatusCode),
Body: body,
},
err: func() error {
if tt.httpErrMsg != "" {
return errors.New(tt.httpErrMsg)
}
return nil
}(),
}
actualResult, actualErr := client.NewFeedClientWithRequestFn(httpClient.Get).FetchItems(context.Background(), tt.feedURL, tt.options)
if tt.expectedErrMsg != "" {
require.Error(t, actualErr)
require.Contains(t, actualErr.Error(), tt.expectedErrMsg)
} else {
require.NoError(t, actualErr)
}
if tt.expectedResult.LastBuild != nil {
require.NotNil(t, actualResult.LastBuild, "LastBuild should not be nil")
assert.Equal(t, *tt.expectedResult.LastBuild, *actualResult.LastBuild, "LastBuild time doesn't match")
} else {
assert.Nil(t, actualResult.LastBuild, "LastBuild should be nil")
}
assert.Equal(t, len(tt.expectedResult.Items), len(actualResult.Items))
if len(tt.expectedResult.Items) > 0 {
for i, expectedItem := range tt.expectedResult.Items {
if i < len(actualResult.Items) {
actualItem := actualResult.Items[i]
if expectedItem.Title != nil {
assert.Equal(t, *expectedItem.Title, *actualItem.Title)
}
if expectedItem.Link != nil {
assert.Equal(t, *expectedItem.Link, *actualItem.Link)
}
}
}
}
assert.Equal(t, tt.feedURL, httpClient.lastFeedURL, "Incorrect feed URL used")
assert.Equal(t, tt.options, *httpClient.lastOptions, "Incorrect HTTP request options")
})
}
}
// Helper function to parse ISO8601 string to time.Time.
func mustParseTime(iso8601 string) *time.Time {
t, err := time.Parse(time.RFC3339, iso8601)
if err != nil {
panic(err)
}
return &t
}

View file

@ -1,4 +1,4 @@
package pull
package client
import (
"github.com/0x2e/fusion/model"

View file

@ -1,27 +1,17 @@
package pull_test
package client_test
import (
"testing"
"time"
"github.com/mmcdole/gofeed"
"github.com/stretchr/testify/assert"
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/pkg/ptr"
"github.com/0x2e/fusion/service/pull"
"github.com/0x2e/fusion/service/pull/client"
)
func TestParseGoFeedItems(t *testing.T) {
// Helper function to parse ISO8601 string to time.Time.
parseTime := func(iso8601 string) *time.Time {
t, err := time.Parse(time.RFC3339, iso8601)
if err != nil {
panic(err)
}
return &t
}
for _, tt := range []struct {
description string
gfItems []*gofeed.Item
@ -36,7 +26,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "https://example.com/link",
Content: "<p>This is the content</p>",
Description: "This is the description",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -45,7 +35,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("https://example.com/guid"),
Link: ptr.To("https://example.com/link"),
Content: ptr.To("<p>This is the content</p>"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
@ -59,7 +49,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "https://example.com/link",
Content: "", // Empty content
Description: "This is the description",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -68,7 +58,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("https://example.com/guid"),
Link: ptr.To("https://example.com/link"),
Content: ptr.To("This is the description"), // Should use description
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
@ -82,7 +72,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "https://example.com/link",
Content: "<p>This is the content</p>",
Description: "This is the description",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -91,7 +81,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("https://example.com/link"), // Should use link
Link: ptr.To("https://example.com/link"),
Content: ptr.To("<p>This is the content</p>"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
@ -105,7 +95,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "https://example.com/link",
Content: "", // Empty content
Description: "This is the description",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -114,7 +104,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("https://example.com/link"), // Should use link
Link: ptr.To("https://example.com/link"),
Content: ptr.To("This is the description"), // Should use description
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
@ -128,7 +118,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "link1",
Content: "content1",
Description: "description1",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
{
Title: "Item 2",
@ -136,7 +126,7 @@ func TestParseGoFeedItems(t *testing.T) {
Link: "link2",
Content: "content2",
Description: "description2",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -145,7 +135,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("guid1"),
Link: ptr.To("link1"),
Content: ptr.To("content1"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
{
@ -153,7 +143,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("guid2"),
Link: ptr.To("link2"),
Content: ptr.To("content2"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
@ -171,7 +161,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: "valid-guid",
Link: "https://example.com/valid",
Content: "valid content",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
nil, // Nil item that should be skipped
{
@ -179,7 +169,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: "another-guid",
Link: "https://example.com/another",
Content: "another content",
PublishedParsed: parseTime("2025-01-01T12:00:00Z"),
PublishedParsed: mustParseTime("2025-01-01T12:00:00Z"),
},
},
expected: []*model.Item{
@ -188,7 +178,7 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("valid-guid"),
Link: ptr.To("https://example.com/valid"),
Content: ptr.To("valid content"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
{
@ -196,14 +186,14 @@ func TestParseGoFeedItems(t *testing.T) {
GUID: ptr.To("another-guid"),
Link: ptr.To("https://example.com/another"),
Content: ptr.To("another content"),
PubDate: parseTime("2025-01-01T12:00:00Z"),
PubDate: mustParseTime("2025-01-01T12:00:00Z"),
Unread: ptr.To(true),
},
},
},
} {
t.Run(tt.description, func(t *testing.T) {
result := pull.ParseGoFeedItems(tt.gfItems)
result := client.ParseGoFeedItems(tt.gfItems)
assert.Equal(t, tt.expected, result)
})
}

View file

@ -2,16 +2,11 @@ package pull
import (
"context"
"fmt"
"io"
"net/http"
"time"
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/pkg/httpx"
"github.com/0x2e/fusion/pkg/ptr"
"github.com/mmcdole/gofeed"
"github.com/0x2e/fusion/service/pull/client"
)
func (p *Puller) do(ctx context.Context, f *model.Feed, force bool) error {
@ -36,31 +31,27 @@ func (p *Puller) do(ctx context.Context, f *model.Feed, force bool) error {
}
}
fetched, err := FetchFeed(ctx, f)
result, err := client.NewFeedClient().FetchItems(ctx, *f.Link, f.FeedRequestOptions)
if err != nil {
p.feedRepo.Update(f.ID, &model.Feed{Failure: ptr.To(err.Error())})
return err
}
if fetched == nil {
return nil
}
isLatestBuild := f.LastBuild != nil && fetched.UpdatedParsed != nil &&
fetched.UpdatedParsed.Equal(*f.LastBuild)
if len(fetched.Items) != 0 && !isLatestBuild {
data := ParseGoFeedItems(fetched.Items)
isLatestBuild := f.LastBuild != nil && result.LastBuild != nil &&
result.LastBuild.Equal(*f.LastBuild)
if len(result.Items) != 0 && !isLatestBuild {
// Set the correct feed ID for all items.
for _, item := range data {
for _, item := range result.Items {
item.FeedID = f.ID
}
if err := p.itemRepo.Insert(data); err != nil {
if err := p.itemRepo.Insert(result.Items); err != nil {
return err
}
}
logger.Infof("fetched %d items", len(fetched.Items))
logger.Infof("fetched %d items", len(result.Items))
return p.feedRepo.Update(f.ID, &model.Feed{
LastBuild: fetched.UpdatedParsed,
LastBuild: result.LastBuild,
Failure: ptr.To(""),
})
}
@ -99,39 +90,3 @@ func DecideFeedUpdateAction(f *model.Feed, now time.Time) (FeedUpdateAction, *Fe
}
return ActionFetchUpdate, nil
}
type feedHTTPRequest func(ctx context.Context, link string, options *model.FeedRequestOptions) (*http.Response, error)
// FeedClient retrieves a feed given a feed URL and parses the result.
type FeedClient struct {
httpRequestFn feedHTTPRequest
}
func NewFeedClient(httpRequestFn feedHTTPRequest) FeedClient {
return FeedClient{
httpRequestFn: httpRequestFn,
}
}
func (c FeedClient) Fetch(ctx context.Context, feedURL string, options *model.FeedRequestOptions) (*gofeed.Feed, error) {
resp, err := c.httpRequestFn(ctx, feedURL, options)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("got status code %d", resp.StatusCode)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return gofeed.NewParser().ParseString(string(data))
}
func FetchFeed(ctx context.Context, f *model.Feed) (*gofeed.Feed, error) {
return NewFeedClient(httpx.FusionRequest).Fetch(ctx, *f.Link, &f.FeedRequestOptions)
}

View file

@ -1,29 +1,16 @@
package pull_test
import (
"context"
"errors"
"net/http"
"strings"
"testing"
"time"
"github.com/mmcdole/gofeed"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/0x2e/fusion/model"
"github.com/0x2e/fusion/pkg/ptr"
"github.com/0x2e/fusion/service/pull"
)
// mockReadCloser is a mock io.ReadCloser that can return either data or an error.
type mockReadCloser struct {
result string
errMsg string
reader *strings.Reader
}
func TestDecideFeedUpdateAction(t *testing.T) {
// Helper function to parse ISO8601 string to time.Time.
parseTime := func(iso8601 string) time.Time {
@ -114,238 +101,3 @@ func TestDecideFeedUpdateAction(t *testing.T) {
})
}
}
func (m *mockReadCloser) Read(p []byte) (n int, err error) {
if m.errMsg != "" {
return 0, errors.New(m.errMsg)
}
if m.reader == nil {
m.reader = strings.NewReader(m.result)
}
return m.reader.Read(p)
}
func (m *mockReadCloser) Close() error {
return nil
}
type mockHTTPClient struct {
resp *http.Response
err error
lastFeedURL string
lastOptions *model.FeedRequestOptions
}
func (m *mockHTTPClient) Get(ctx context.Context, link string, options *model.FeedRequestOptions) (*http.Response, error) {
// Store the last feed URL and options for assertions.
m.lastFeedURL = link
m.lastOptions = options
if m.err != nil {
return nil, m.err
}
return m.resp, nil
}
func TestFeedClientFetch(t *testing.T) {
for _, tt := range []struct {
description string
feedURL string
options *model.FeedRequestOptions
httpRespBody string
httpStatusCode int
httpErrMsg string
httpBodyReadErrMsg string
expectedFeed *gofeed.Feed
expectedErrMsg string
}{
{
description: "fetch succeeds when HTTP request and RSS parse succeed",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedFeed: &gofeed.Feed{
Title: "Test Feed",
FeedType: "rss",
FeedVersion: "2.0",
Items: []*gofeed.Item{
{
Title: "Test Item",
Link: "https://example.com/item",
Links: []string{"https://example.com/item"},
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds with default behavior when options are nil",
feedURL: "https://example.com/feed.xml",
options: nil,
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed</title>
<item>
<title>Test Item</title>
<link>https://example.com/item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedFeed: &gofeed.Feed{
Title: "Test Feed",
FeedType: "rss",
FeedVersion: "2.0",
Items: []*gofeed.Item{
{
Title: "Test Item",
Link: "https://example.com/item",
Links: []string{"https://example.com/item"},
},
},
},
expectedErrMsg: "",
},
{
description: "fetch succeeds when using configured proxy server",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{
ReqProxy: func() *string { s := "http://proxy.example.com:8080"; return &s }(),
},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>Test Feed via Proxy</title>
<item>
<title>Test Item via Proxy</title>
<link>https://example.com/proxy-item</link>
</item>
</channel>
</rss>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedFeed: &gofeed.Feed{
Title: "Test Feed via Proxy",
FeedType: "rss",
FeedVersion: "2.0",
Items: []*gofeed.Item{
{
Title: "Test Item via Proxy",
Link: "https://example.com/proxy-item",
Links: []string{"https://example.com/proxy-item"},
},
},
},
expectedErrMsg: "",
},
{
description: "fetch fails when HTTP request returns connection error",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: 0, // No status code since request errors
httpErrMsg: "connection refused",
httpBodyReadErrMsg: "",
expectedFeed: nil,
expectedErrMsg: "connection refused",
},
{
description: "fetch fails when HTTP response has non-200 status code",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusNotFound,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedFeed: nil,
expectedErrMsg: "got status code 404",
},
{
description: "fetch fails when HTTP response body cannot be read",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{},
httpRespBody: "",
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "mock body read error",
expectedFeed: nil,
expectedErrMsg: "mock body read error",
},
{
description: "fetch fails when RSS content cannot be parsed",
feedURL: "https://example.com/feed.xml",
options: &model.FeedRequestOptions{},
httpRespBody: `<?xml version="1.0" encoding="UTF-8"?>
<invalid>
<malformed>
<content>This is not a valid RSS feed</content>
</malformed>
</invalid>`,
httpStatusCode: http.StatusOK,
httpErrMsg: "",
httpBodyReadErrMsg: "",
expectedFeed: nil,
expectedErrMsg: "Failed to detect feed type",
},
} {
t.Run(tt.description, func(t *testing.T) {
body := &mockReadCloser{
result: tt.httpRespBody,
errMsg: tt.httpBodyReadErrMsg,
}
httpClient := &mockHTTPClient{
resp: &http.Response{
StatusCode: tt.httpStatusCode,
Status: http.StatusText(tt.httpStatusCode),
Body: body,
},
err: func() error {
if tt.httpErrMsg != "" {
return errors.New(tt.httpErrMsg)
}
return nil
}(),
}
actualFeed, actualErr := pull.NewFeedClient(httpClient.Get).Fetch(context.Background(), tt.feedURL, tt.options)
if tt.expectedErrMsg != "" {
require.Error(t, actualErr)
require.Contains(t, actualErr.Error(), tt.expectedErrMsg)
} else {
require.NoError(t, actualErr)
}
assert.Equal(t, tt.expectedFeed, actualFeed)
// Verify that the HTTP client received the correct URL.
assert.Equal(t, tt.feedURL, httpClient.lastFeedURL, "Incorrect feed URL used")
// Verify that the HTTP client received the correct options.
if tt.options == nil {
assert.Nil(t, httpClient.lastOptions, "Expected nil options")
} else {
assert.Equal(t, *tt.options, *httpClient.lastOptions, "Incorrect HTTP request options")
}
})
}
}