mirror of
https://github.com/anyproto/anytype-heart.git
synced 2025-06-11 02:13:41 +09:00
GO-4753 rework file names detection when downloading files
This commit is contained in:
parent
9afac115e4
commit
1f79f955aa
5 changed files with 243 additions and 56 deletions
|
@ -2,8 +2,10 @@ package uri
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"mime"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
@ -84,3 +86,68 @@ func NormalizeAndParseURI(uri string) (*url.URL, error) {
|
|||
|
||||
return url.Parse(normalizeURI(uri))
|
||||
}
|
||||
|
||||
var preferredExtensions = map[string]string{
|
||||
"image/jpeg": ".jpeg",
|
||||
"audio/mpeg": ".mp3",
|
||||
// Add more preferred mappings if needed
|
||||
}
|
||||
|
||||
func GetFileNameFromURLAndContentType(u *url.URL, contentType string) string {
|
||||
var host string
|
||||
if u != nil {
|
||||
|
||||
lastSegment := filepath.Base(u.Path)
|
||||
// Determine if this looks like a real filename. We'll say it's real if it has a dot or is a hidden file starting with a dot.
|
||||
if lastSegment == "." || lastSegment == "" || (!strings.HasPrefix(lastSegment, ".") && !strings.Contains(lastSegment, ".")) {
|
||||
// Not a valid filename
|
||||
lastSegment = ""
|
||||
}
|
||||
|
||||
if lastSegment != "" {
|
||||
// A plausible filename was found directly in the URL
|
||||
return lastSegment
|
||||
}
|
||||
|
||||
// No filename, fallback to host-based
|
||||
host = strings.TrimPrefix(u.Hostname(), "www.")
|
||||
host = strings.ReplaceAll(host, ".", "_")
|
||||
if host == "" {
|
||||
host = "file"
|
||||
}
|
||||
}
|
||||
|
||||
// Try to get a preferred extension for the content type
|
||||
var ext string
|
||||
if preferred, ok := preferredExtensions[contentType]; ok {
|
||||
ext = preferred
|
||||
} else {
|
||||
extensions, err := mime.ExtensionsByType(contentType)
|
||||
if err != nil || len(extensions) == 0 {
|
||||
// Fallback if no known extension
|
||||
extensions = []string{".bin"}
|
||||
}
|
||||
ext = extensions[0]
|
||||
}
|
||||
|
||||
// Determine a base name from content type
|
||||
base := "file"
|
||||
if strings.HasPrefix(contentType, "image/") {
|
||||
base = "image"
|
||||
} else if strings.HasPrefix(contentType, "audio/") {
|
||||
base = "audio"
|
||||
} else if strings.HasPrefix(contentType, "video/") {
|
||||
base = "video"
|
||||
}
|
||||
|
||||
var res strings.Builder
|
||||
if host != "" {
|
||||
res.WriteString(host)
|
||||
res.WriteString("_")
|
||||
}
|
||||
res.WriteString(base)
|
||||
if ext != "" {
|
||||
res.WriteString(ext)
|
||||
}
|
||||
return res.String()
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package uri
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
@ -118,3 +119,114 @@ func TestURI_ValidateURI(t *testing.T) {
|
|||
assert.NoError(t, err)
|
||||
})
|
||||
}
|
||||
|
||||
func TestGetFileNameFromURLWithContentTypeAndMime(t *testing.T) {
|
||||
mustParseURL := func(s string) *url.URL {
|
||||
u, err := url.Parse(s)
|
||||
if err != nil {
|
||||
t.Fatalf("url.Parse(%q) failed: %v", s, err)
|
||||
}
|
||||
return u
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
url *url.URL
|
||||
contentType string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "URL with explicit filename and extension",
|
||||
url: mustParseURL("https://example.com/image.jpg"),
|
||||
contentType: "image/jpeg",
|
||||
expected: "image.jpg",
|
||||
},
|
||||
{
|
||||
name: "URL with explicit filename and extension, but wrong content type",
|
||||
url: mustParseURL("https://example.com/image.jpg"),
|
||||
contentType: "image/png",
|
||||
expected: "image.jpg",
|
||||
},
|
||||
{
|
||||
name: "URL with explicit filename and extension, and empty content type",
|
||||
url: mustParseURL("https://example.com/image.jpg"),
|
||||
contentType: "",
|
||||
expected: "image.jpg",
|
||||
},
|
||||
{
|
||||
name: "URL with query and fragment, explicit filename",
|
||||
url: mustParseURL("https://example.com/file.jpeg?query=1#111"),
|
||||
contentType: "image/jpeg",
|
||||
expected: "file.jpeg",
|
||||
},
|
||||
{
|
||||
name: "No filename in URL, fallback to host and image/jpeg",
|
||||
url: mustParseURL("https://www.example.com/path/to/"),
|
||||
contentType: "image/jpeg",
|
||||
// host -> example_com
|
||||
// image/jpeg typically corresponds to .jpeg or .jpg (mime usually returns .jpeg)
|
||||
expected: "example_com_image.jpeg",
|
||||
},
|
||||
{
|
||||
name: "Host-only URL, fallback with image/png",
|
||||
url: mustParseURL("https://www.example.com"),
|
||||
contentType: "image/png",
|
||||
expected: "example_com_image.png",
|
||||
},
|
||||
{
|
||||
name: "Filename present with video/mp4",
|
||||
url: mustParseURL("https://www.sub.example.co.uk/folder/video.mp4"),
|
||||
contentType: "video/mp4",
|
||||
expected: "video.mp4",
|
||||
},
|
||||
{
|
||||
name: "No extension but filename present",
|
||||
url: mustParseURL("https://example.com/filename"),
|
||||
contentType: "image/gif",
|
||||
expected: "example_com_image.gif",
|
||||
},
|
||||
{
|
||||
name: "Invalid URL returns empty",
|
||||
url: nil,
|
||||
contentType: "image/jpeg",
|
||||
expected: "image.jpeg",
|
||||
},
|
||||
{
|
||||
name: "No filename, video/unknown fallback to .bin",
|
||||
url: mustParseURL("https://www.subdomain.example.com/folder/"),
|
||||
contentType: "video/unknown",
|
||||
// no known extension for "video/unknown", fallback .bin
|
||||
expected: "subdomain_example_com_video.bin",
|
||||
},
|
||||
{
|
||||
name: "Hidden file as filename",
|
||||
url: mustParseURL("https://example.com/.htaccess"),
|
||||
contentType: "text/plain",
|
||||
expected: ".htaccess",
|
||||
},
|
||||
{
|
||||
name: "URL with query but no filename extension, fallback audio/mpeg",
|
||||
url: mustParseURL("https://example.com/path?version=2"),
|
||||
contentType: "audio/mpeg",
|
||||
// audio/mpeg known extension: .mp3
|
||||
expected: "example_com_audio.mp3",
|
||||
},
|
||||
{
|
||||
name: "Unknown type entirely",
|
||||
url: mustParseURL("https://example.net/"),
|
||||
contentType: "application/x-something-strange",
|
||||
// no filename, fallback host: example_net
|
||||
// unknown type -> .bin
|
||||
expected: "example_net_file.bin",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := GetFileNameFromURLAndContentType(tt.url, tt.contentType)
|
||||
if got != tt.expected {
|
||||
t.Errorf("GetFileNameFromURL(%q, %q) = %q; want %q", tt.url, tt.contentType, got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue