diff --git a/go.mod b/go.mod index cc107c628..571c09c0b 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/gogo/protobuf v1.3.1 github.com/golang/protobuf v1.3.2 // indirect github.com/ipfs/go-log v0.0.1 + github.com/otiai10/opengraph v1.1.0 github.com/stretchr/testify v1.3.0 github.com/textileio/go-textile v0.7.2-0.20190907000013-95a885123536 ) diff --git a/go.sum b/go.sum index 8a6050d9f..8aa0b3bcf 100644 --- a/go.sum +++ b/go.sum @@ -69,6 +69,7 @@ github.com/coreos/go-semver v0.2.1-0.20180108230905-e214231b295a/go.mod h1:nnelY github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= +github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cskr/pubsub v1.0.2 h1:vlOzMhl6PFn60gRlTQQsIfVwaPB/B/8MziK8FhEPt/0= github.com/cskr/pubsub v1.0.2/go.mod h1:/8MzYXk/NJAz782G8RPkFzXTZVu63VotefPnR9TIRis= github.com/davecgh/go-spew v0.0.0-20171005155431-ecdeabc65495/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -650,6 +651,14 @@ github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= +github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95 h1:+OLn68pqasWca0z5ryit9KGfp3sUsW4Lqg32iRMJyzs= +github.com/otiai10/curr v0.0.0-20150429015615-9b4961190c95/go.mod h1:9qAhocn7zKJG+0mI8eUu6xqkFDYS2kb2saOteoSB3cE= +github.com/otiai10/marmoset v0.4.0 h1:Hg59lQI7qQowBEdsAJ/+VDTEospTBzXX/A1Gsw4mlvA= +github.com/otiai10/marmoset v0.4.0/go.mod h1:t2q6dXWZ9YcFdRREDApX4bCmfQnL3isJ2dgl8ychlXg= +github.com/otiai10/mint v1.3.0 h1:Ady6MKVezQwHBkGzLFbrsywyp09Ah7rkmfjV3Bcr5uc= +github.com/otiai10/mint v1.3.0/go.mod h1:F5AjcsTsWUqX+Na9fpHb52P8pcRX2CI6A3ctIT91xUo= +github.com/otiai10/opengraph v1.1.0 h1:7CLPM41/VNVOh6V04K9Ccui2Fn6p+NEZ06krlRxV7HQ= +github.com/otiai10/opengraph v1.1.0/go.mod h1:ZMbPcfiSRSsg3+yrWZCXrgYL6kEK4KpH4GG1iyIvEXs= github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtPdI/k= github.com/pelletier/go-toml v1.1.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= @@ -679,6 +688,7 @@ github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/rs/cors v1.6.0 h1:G9tHG9lebljV9mfp9SNPDL36nCDxmo3zTlAf1YgvzmI= github.com/rs/cors v1.6.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd h1:CmH9+J6ZSsIjUK3dcGsnCnO41eRBOnY12zwkn5qVwgc= github.com/rwcarlsen/goexif v0.0.0-20190401172101-9e8deecbddbd/go.mod h1:hPqNNc0+uJM6H+SuU8sEs5K5IQeKccPqeSjfgcKGgPk= github.com/ryanuber/go-glob v0.0.0-20170128012129-256dc444b735/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= @@ -688,6 +698,7 @@ github.com/shirou/gopsutil v0.0.0-20180427012116-c95755e4bcd7/go.mod h1:5b4v6he4 github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc= github.com/shurcooL/go v0.0.0-20180423040247-9e1955d9fb6e/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk= github.com/shurcooL/go-goon v0.0.0-20170922171312-37c2f522c041/go.mod h1:N5mDOmsrJOB+vfqUK+7DmDyjhSLIIBnXo9lvZJj3MWQ= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.0.5/go.mod h1:pMByvHTf9Beacp5x1UXfOR9xyW/9antXMhjMPG0dEzc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= @@ -741,6 +752,7 @@ github.com/ugorji/go/codec v0.0.0-20181209151446-772ced7fd4c2/go.mod h1:VFNgLljT github.com/ugorji/go/codec v0.0.0-20190320090025-2dc34c0b8780 h1:vG/gY/PxA3v3l04qxe3tDjXyu3bozii8ulSlIPOYKhI= github.com/ugorji/go/codec v0.0.0-20190320090025-2dc34c0b8780/go.mod h1:iT03XoTwV7xq/+UGwKO3UbC1nNNlopQiY61beSdrtOA= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= +github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830 h1:8kxMKmKzXXL4Ru1nyhvdms/JjWt+3YLpvRb/bAjO/y0= github.com/warpfork/go-wish v0.0.0-20190328234359-8b3e70f8e830/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= @@ -838,6 +850,8 @@ golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs= +golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/util/linkpreview/linkpreview.go b/util/linkpreview/linkpreview.go new file mode 100644 index 000000000..934dfdbfb --- /dev/null +++ b/util/linkpreview/linkpreview.go @@ -0,0 +1,93 @@ +package linkpreview + +import ( + "context" + "net/http" + "path/filepath" + "strings" + + "github.com/otiai10/opengraph" +) + +func New() LinkPreview { + return &linkPreview{} +} + +type LinkType string + +const ( + LinkTypeHtml LinkType = "html" + LinkTypeImage LinkType = "image" + LinkTypeVideo LinkType = "video" + LinkTypeText LinkType = "text" + LinkTypeUnexpected LinkType = "unexpected" + + // read no more than 400 kb + maxBytesToRead = 400000 +) + +type LinkPreview interface { + Fetch(ctx context.Context, url string) (Info, error) +} + +type Info struct { + Title string + Description string + ImageUrl string + Type LinkType +} + +type linkPreview struct{} + +func (l *linkPreview) Fetch(ctx context.Context, url string) (Info, error) { + rt := &proxyRoundTripper{RoundTripper: http.DefaultTransport} + client := &http.Client{Transport: rt} + og, err := opengraph.FetchWithContext(ctx, url, client) + if err != nil { + if resp := rt.lastResponse; resp != nil && resp.StatusCode == http.StatusOK { + return l.makeNonHtml(url, resp) + } + return Info{}, err + } + return l.convertOGToInfo(og), nil +} + +func (l *linkPreview) convertOGToInfo(og *opengraph.OpenGraph) (i Info) { + i = Info{ + Title: og.Title, + Description: og.Description, + Type: LinkTypeHtml, + } + if len(og.Image) != 0 { + i.ImageUrl = og.Image[0].URL + } + return +} + +func (l *linkPreview) makeNonHtml(url string, resp *http.Response) (i Info, err error) { + ct := resp.Header.Get("Content-Type") + i.Title = filepath.Base(url) + if strings.HasPrefix(ct, "image/") { + i.Type = LinkTypeImage + i.ImageUrl = url + } else if strings.HasPrefix(ct, "text/") { + i.Type = LinkTypeText + } else { + i.Type = LinkTypeUnexpected + } + return +} + +type proxyRoundTripper struct { + http.RoundTripper + lastResponse *http.Response +} + +func (p *proxyRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + resp, err := p.RoundTripper.RoundTrip(req) + if err == nil { + p.lastResponse = resp + resp.Body = http.MaxBytesReader(nil, resp.Body, maxBytesToRead) + } + return resp, err +} diff --git a/util/linkpreview/linkpreview_test.go b/util/linkpreview/linkpreview_test.go new file mode 100644 index 000000000..51f6df83c --- /dev/null +++ b/util/linkpreview/linkpreview_test.go @@ -0,0 +1,83 @@ +package linkpreview + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ctx = context.Background() + +func TestLinkPreview_Fetch(t *testing.T) { + t.Run("html page", func(t *testing.T) { + ts := newTestServer("text/html", strings.NewReader(tetsHtml)) + defer ts.Close() + lp := New() + + info, err := lp.Fetch(ctx, ts.URL) + require.NoError(t, err) + assert.Equal(t, Info{ + Title: "Title", + Description: "Description", + ImageUrl: "http://site.com/images/example.jpg", + Type: LinkTypeHtml, + }, info) + }) + + t.Run("binary image", func(t *testing.T) { + tr := testReader(0) + ts := newTestServer("image/jpg", &tr) + defer ts.Close() + url := ts.URL + "/filename.jpg" + lp := New() + info, err := lp.Fetch(ctx, url) + require.NoError(t, err) + assert.Equal(t, Info{ + Title: "filename.jpg", + ImageUrl: url, + Type: LinkTypeImage, + }, info) + assert.True(t, int(tr) <= maxBytesToRead) + }) + + t.Run("binary", func(t *testing.T) { + tr := testReader(0) + ts := newTestServer("binary/octed-stream", &tr) + defer ts.Close() + url := ts.URL + "/filename.jpg" + lp := New() + info, err := lp.Fetch(ctx, url) + require.NoError(t, err) + assert.Equal(t, Info{ + Title: "filename.jpg", + Type: LinkTypeUnexpected, + }, info) + assert.True(t, int(tr) <= maxBytesToRead) + }) +} + +func newTestServer(contentType string, data io.Reader) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", contentType) + io.Copy(w, data) + })) +} + +const tetsHtml = ` +Title + + +` + +type testReader int + +func (t *testReader) Read(p []byte) (n int, err error) { + *t += testReader(len(p)) + return len(p), nil +}