mirror of
https://github.com/anyproto/anytype-heart.git
synced 2025-06-09 17:44:59 +09:00
Merge pull request #1917 from anyproto/go-4573-importing-markdown-files-named-in-japanese-or-chinese
GO-4573: Importing markdown files named in Japanese or Chinese, references to these files won't resolve properly
This commit is contained in:
commit
ec00669550
12 changed files with 215 additions and 38 deletions
|
@ -12,13 +12,18 @@ import (
|
|||
"github.com/anyproto/anytype-heart/util/anyerror"
|
||||
)
|
||||
|
||||
type OriginalFileNameGetter interface {
|
||||
GetFileOriginalName(filename string) string
|
||||
}
|
||||
|
||||
type Zip struct {
|
||||
archiveReader *zip.ReadCloser
|
||||
fileReaders map[string]*zip.File
|
||||
archiveReader *zip.ReadCloser
|
||||
fileReaders map[string]*zip.File
|
||||
originalToNormalizedNames map[string]string
|
||||
}
|
||||
|
||||
func NewZip() *Zip {
|
||||
return &Zip{fileReaders: make(map[string]*zip.File, 0)}
|
||||
return &Zip{fileReaders: make(map[string]*zip.File), originalToNormalizedNames: make(map[string]string)}
|
||||
}
|
||||
|
||||
func (z *Zip) Initialize(importPath string) error {
|
||||
|
@ -32,7 +37,11 @@ func (z *Zip) Initialize(importPath string) error {
|
|||
if strings.HasPrefix(f.Name, "__MACOSX/") {
|
||||
continue
|
||||
}
|
||||
fileReaders[normalizeName(f, i)] = f
|
||||
normalizedName := normalizeName(f, i)
|
||||
fileReaders[normalizedName] = f
|
||||
if normalizedName != f.Name {
|
||||
z.originalToNormalizedNames[f.Name] = normalizedName
|
||||
}
|
||||
}
|
||||
z.fileReaders = fileReaders
|
||||
return nil
|
||||
|
@ -94,3 +103,10 @@ func (z *Zip) Close() {
|
|||
func (z *Zip) IsRootFile(fileName string) bool {
|
||||
return filepath.Dir(fileName) == "."
|
||||
}
|
||||
|
||||
func (z *Zip) GetFileOriginalName(fileName string) string {
|
||||
if originalName, ok := z.originalToNormalizedNames[fileName]; ok {
|
||||
return originalName
|
||||
}
|
||||
return fileName
|
||||
}
|
||||
|
|
|
@ -2,18 +2,27 @@ package test
|
|||
|
||||
import (
|
||||
"archive/zip"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func CreateEmptyZip(t *testing.T, zipFileName string) error {
|
||||
func CreateEmptyZip(t *testing.T, zipFileName string) {
|
||||
zipFile, err := os.Create(zipFileName)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to create zip file: %w\n", err)
|
||||
}
|
||||
assert.NoError(t, err)
|
||||
defer zipFile.Close()
|
||||
|
||||
zipWriter := zip.NewWriter(zipFile)
|
||||
err = zipWriter.Close()
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
func CreateZipWithFiles(t *testing.T, zipFileName, testDataDir string, files []*zip.FileHeader) {
|
||||
zipFile, err := os.Create(zipFileName)
|
||||
assert.NoError(t, err)
|
||||
defer zipFile.Close()
|
||||
|
||||
zipWriter := zip.NewWriter(zipFile)
|
||||
|
@ -21,5 +30,13 @@ func CreateEmptyZip(t *testing.T, zipFileName string) error {
|
|||
err = zipWriter.Close()
|
||||
assert.NoError(t, err)
|
||||
}()
|
||||
return nil
|
||||
|
||||
for _, file := range files {
|
||||
writer, err := zipWriter.CreateHeader(file)
|
||||
assert.NoError(t, err)
|
||||
fileReader, err := os.Open(filepath.Join(testDataDir, file.Name))
|
||||
assert.NoError(t, err)
|
||||
_, err = io.Copy(writer, fileReader)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -819,8 +819,7 @@ func TestCSV_GetSnapshots(t *testing.T) {
|
|||
// given
|
||||
dir := t.TempDir()
|
||||
zipPath := filepath.Join(dir, "empty.zip")
|
||||
err := test.CreateEmptyZip(t, zipPath)
|
||||
assert.Nil(t, err)
|
||||
test.CreateEmptyZip(t, zipPath)
|
||||
csv := CSV{}
|
||||
p := process.NewProgress(&pb.ModelProcessMessageOfImport{Import: &pb.ModelProcessImport{}})
|
||||
|
||||
|
|
|
@ -61,8 +61,7 @@ func TestHTML_GetSnapshots(t *testing.T) {
|
|||
// given
|
||||
dir := t.TempDir()
|
||||
zipPath := filepath.Join(dir, "empty.zip")
|
||||
err := test.CreateEmptyZip(t, zipPath)
|
||||
assert.Nil(t, err)
|
||||
test.CreateEmptyZip(t, zipPath)
|
||||
html := HTML{}
|
||||
p := process.NewProgress(&pb.ModelProcessMessageOfImport{Import: &pb.ModelProcessImport{}})
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ func (m *mdConverter) processFiles(importPath string, allErrors *common.ConvertE
|
|||
}
|
||||
fileInfo := m.getFileInfo(importSource, allErrors)
|
||||
for name, file := range fileInfo {
|
||||
m.processBlocks(name, file, fileInfo)
|
||||
m.processBlocks(name, file, fileInfo, importSource)
|
||||
for _, b := range file.ParsedBlocks {
|
||||
m.processFileBlock(b, importSource, importPath)
|
||||
}
|
||||
|
@ -89,36 +89,38 @@ func (m *mdConverter) fillFilesInfo(importSource source.Source, fileInfo map[str
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *mdConverter) processBlocks(shortPath string, file *FileInfo, files map[string]*FileInfo) {
|
||||
func (m *mdConverter) processBlocks(shortPath string, file *FileInfo, files map[string]*FileInfo, importSource source.Source) {
|
||||
for _, block := range file.ParsedBlocks {
|
||||
m.processTextBlock(block, files)
|
||||
m.processTextBlock(block, files, importSource)
|
||||
}
|
||||
m.processLinkBlock(shortPath, file, files)
|
||||
}
|
||||
|
||||
func (m *mdConverter) processTextBlock(block *model.Block, files map[string]*FileInfo) {
|
||||
func (m *mdConverter) processTextBlock(block *model.Block, files map[string]*FileInfo, importSource source.Source) {
|
||||
txt := block.GetText()
|
||||
if txt != nil && txt.Marks != nil {
|
||||
if len(txt.Marks.Marks) == 1 && txt.Marks.Marks[0].Type == model.BlockContentTextMark_Link {
|
||||
m.handleSingleMark(block, files)
|
||||
m.handleSingleMark(block, files, importSource)
|
||||
} else {
|
||||
m.handleMultipleMarks(block, files)
|
||||
m.handleMultipleMarks(block, files, importSource)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mdConverter) handleSingleMark(block *model.Block, files map[string]*FileInfo) {
|
||||
func (m *mdConverter) handleSingleMark(block *model.Block, files map[string]*FileInfo, importSource source.Source) {
|
||||
txt := block.GetText()
|
||||
link := txt.Marks.Marks[0].Param
|
||||
wholeLineLink := m.isWholeLineLink(txt.Text, txt.Marks.Marks[0])
|
||||
ext := filepath.Ext(link)
|
||||
ext := filepath.Ext(txt.Marks.Marks[0].Param)
|
||||
link := m.getOriginalName(txt.Marks.Marks[0].Param, importSource)
|
||||
if file := files[link]; file != nil {
|
||||
if strings.EqualFold(ext, ".csv") {
|
||||
txt.Marks.Marks[0].Param = link
|
||||
m.processCSVFileLink(block, files, link, wholeLineLink)
|
||||
return
|
||||
}
|
||||
if strings.EqualFold(ext, ".md") {
|
||||
// only convert if this is the only link in the row
|
||||
txt.Marks.Marks[0].Param = link
|
||||
m.convertToAnytypeLinkBlock(block, wholeLineLink)
|
||||
} else {
|
||||
block.Content = anymark.ConvertTextToFile(txt.Marks.Marks[0].Param)
|
||||
|
@ -129,31 +131,33 @@ func (m *mdConverter) handleSingleMark(block *model.Block, files map[string]*Fil
|
|||
}
|
||||
}
|
||||
|
||||
func (m *mdConverter) handleMultipleMarks(block *model.Block, files map[string]*FileInfo) {
|
||||
func (m *mdConverter) handleMultipleMarks(block *model.Block, files map[string]*FileInfo, importSource source.Source) {
|
||||
txt := block.GetText()
|
||||
for _, mark := range txt.Marks.Marks {
|
||||
if mark.Type == model.BlockContentTextMark_Link {
|
||||
if stop := m.handleSingleLinkMark(block, files, mark, txt); stop {
|
||||
if stop := m.handleSingleLinkMark(block, files, mark, txt, importSource); stop {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mdConverter) handleSingleLinkMark(block *model.Block, files map[string]*FileInfo, mark *model.BlockContentTextMark, txt *model.BlockContentText) bool {
|
||||
link := mark.Param
|
||||
func (m *mdConverter) handleSingleLinkMark(block *model.Block, files map[string]*FileInfo, mark *model.BlockContentTextMark, txt *model.BlockContentText, importSource source.Source) bool {
|
||||
isWholeLink := m.isWholeLineLink(txt.Text, mark)
|
||||
link := m.getOriginalName(mark.Param, importSource)
|
||||
ext := filepath.Ext(link)
|
||||
if file := files[link]; file != nil {
|
||||
file.HasInboundLinks = true
|
||||
if strings.EqualFold(ext, ".md") || strings.EqualFold(ext, ".csv") {
|
||||
mark.Type = model.BlockContentTextMark_Mention
|
||||
mark.Param = link
|
||||
return false
|
||||
}
|
||||
if m.isWholeLineLink(txt.Text, mark) {
|
||||
if isWholeLink {
|
||||
block.Content = anymark.ConvertTextToFile(mark.Param)
|
||||
return true
|
||||
}
|
||||
} else if m.isWholeLineLink(txt.Text, mark) {
|
||||
} else if isWholeLink {
|
||||
m.convertTextToBookmark(mark.Param, block)
|
||||
return true
|
||||
}
|
||||
|
@ -275,3 +279,10 @@ func (m *mdConverter) createBlocksFromFile(importSource source.Source, filePath
|
|||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *mdConverter) getOriginalName(link string, importSource source.Source) string {
|
||||
if originalFileNameGetter, ok := importSource.(source.OriginalFileNameGetter); ok {
|
||||
return originalFileNameGetter.GetFileOriginalName(link)
|
||||
}
|
||||
return link
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ func Test_processFiles(t *testing.T) {
|
|||
files := converter.processFiles(absolutePath, common.NewError(pb.RpcObjectImportRequest_IGNORE_ERRORS), source)
|
||||
|
||||
// then
|
||||
assert.Len(t, files, 6)
|
||||
assert.Len(t, files, 9)
|
||||
|
||||
pdfFilePath := filepath.Join(absolutePath, "test.pdf")
|
||||
assert.Contains(t, files, pdfFilePath)
|
||||
|
@ -74,7 +74,7 @@ func Test_processFiles(t *testing.T) {
|
|||
files := converter.processFiles(absolutePath, common.NewError(pb.RpcObjectImportRequest_IGNORE_ERRORS), source)
|
||||
|
||||
// then
|
||||
assert.Len(t, files, 4)
|
||||
assert.Len(t, files, 7)
|
||||
|
||||
pdfFilePath := filepath.Join(absolutePath, "test.pdf")
|
||||
assert.NotContains(t, files, pdfFilePath)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package markdown
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
|
@ -97,7 +98,7 @@ func TestMarkdown_GetSnapshots(t *testing.T) {
|
|||
// then
|
||||
assert.Nil(t, err)
|
||||
assert.NotNil(t, sn)
|
||||
assert.Len(t, sn.Snapshots, 4)
|
||||
assert.Len(t, sn.Snapshots, 7)
|
||||
|
||||
fileNameToObjectId := make(map[string]string, len(sn.Snapshots))
|
||||
for _, snapshot := range sn.Snapshots {
|
||||
|
@ -119,8 +120,7 @@ func TestMarkdown_GetSnapshots(t *testing.T) {
|
|||
// given
|
||||
testDirectory := t.TempDir()
|
||||
zipPath := filepath.Join(testDirectory, "empty.zip")
|
||||
err := test.CreateEmptyZip(t, zipPath)
|
||||
assert.Nil(t, err)
|
||||
test.CreateEmptyZip(t, zipPath)
|
||||
|
||||
h := &Markdown{}
|
||||
p := process.NewProgress(&pb.ModelProcessMessageOfImport{Import: &pb.ModelProcessImport{}})
|
||||
|
@ -139,6 +139,132 @@ func TestMarkdown_GetSnapshots(t *testing.T) {
|
|||
assert.Nil(t, sn)
|
||||
assert.True(t, errors.Is(ce.GetResultError(model.Import_Markdown), common.ErrFileImportNoObjectsInZipArchive))
|
||||
})
|
||||
t.Run("import non utf files", func(t *testing.T) {
|
||||
// given
|
||||
testDirectory := t.TempDir()
|
||||
zipPath := filepath.Join(testDirectory, "nonutf.zip")
|
||||
fileMdName := "こんにちは.md"
|
||||
fileCsvName := "你好.csv"
|
||||
fileWithLinksName := "nonutflinks.md"
|
||||
|
||||
test.CreateZipWithFiles(t, zipPath, "testdata", []*zip.FileHeader{
|
||||
{
|
||||
Name: fileWithLinksName,
|
||||
Method: zip.Deflate,
|
||||
},
|
||||
{
|
||||
Name: fileMdName,
|
||||
Method: zip.Deflate,
|
||||
NonUTF8: true,
|
||||
},
|
||||
{
|
||||
Name: fileCsvName,
|
||||
Method: zip.Deflate,
|
||||
NonUTF8: true,
|
||||
},
|
||||
})
|
||||
|
||||
h := &Markdown{}
|
||||
p := process.NewProgress(&pb.ModelProcessMessageOfImport{Import: &pb.ModelProcessImport{}})
|
||||
|
||||
// when
|
||||
sn, ce := h.GetSnapshots(context.Background(), &pb.RpcObjectImportRequest{
|
||||
Params: &pb.RpcObjectImportRequestParamsOfMarkdownParams{
|
||||
MarkdownParams: &pb.RpcObjectImportRequestMarkdownParams{Path: []string{zipPath}},
|
||||
},
|
||||
Type: model.Import_Markdown,
|
||||
Mode: pb.RpcObjectImportRequest_IGNORE_ERRORS,
|
||||
}, p)
|
||||
|
||||
// then
|
||||
assert.Nil(t, ce)
|
||||
assert.NotNil(t, sn)
|
||||
assert.Len(t, sn.Snapshots, 4)
|
||||
fileNameToObjectId := make(map[string]string, len(sn.Snapshots))
|
||||
for _, snapshot := range sn.Snapshots {
|
||||
fileNameToObjectId[snapshot.FileName] = snapshot.Id
|
||||
}
|
||||
var found bool
|
||||
rootId := fileNameToObjectId[fileWithLinksName]
|
||||
want := buildTreeWithNonUtfLinks(fileNameToObjectId, rootId)
|
||||
for _, snapshot := range sn.Snapshots {
|
||||
if snapshot.FileName == fileWithLinksName {
|
||||
found = true
|
||||
blockbuilder.AssertTreesEqual(t, want.Build(), snapshot.Snapshot.Data.Blocks)
|
||||
}
|
||||
}
|
||||
assert.True(t, found)
|
||||
})
|
||||
}
|
||||
|
||||
func buildTreeWithNonUtfLinks(fileNameToObjectId map[string]string, rootId string) *blockbuilder.Block {
|
||||
testMdPath := fileNameToObjectId["import file 2.md"]
|
||||
testCsvPath := fileNameToObjectId["import file 3.csv"]
|
||||
|
||||
want := blockbuilder.Root(
|
||||
blockbuilder.ID(rootId),
|
||||
blockbuilder.Children(
|
||||
blockbuilder.Text("NonUtf 1 test6", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testMdPath,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Text("NonUtf 2 test7", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testCsvPath,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Text("NonUtf 1 test6", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testMdPath,
|
||||
},
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Bold,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Text("NonUtf 2 test7", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testCsvPath,
|
||||
},
|
||||
{
|
||||
Range: &model.Range{From: 9, To: 14},
|
||||
Type: model.BlockContentTextMark_Bold,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Text("test6", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 0, To: 5},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testMdPath,
|
||||
},
|
||||
{
|
||||
Range: &model.Range{From: 0, To: 5},
|
||||
Type: model.BlockContentTextMark_Bold,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Text("test7", blockbuilder.TextMarks(model.BlockContentTextMarks{Marks: []*model.BlockContentTextMark{
|
||||
{
|
||||
Range: &model.Range{From: 0, To: 5},
|
||||
Type: model.BlockContentTextMark_Mention,
|
||||
Param: testCsvPath,
|
||||
},
|
||||
{
|
||||
Range: &model.Range{From: 0, To: 5},
|
||||
Type: model.BlockContentTextMark_Bold,
|
||||
},
|
||||
}})),
|
||||
blockbuilder.Link(rootId),
|
||||
))
|
||||
return want
|
||||
}
|
||||
|
||||
func buildExpectedTree(fileNameToObjectId map[string]string, provider *MockTempDir, rootId string) *blockbuilder.Block {
|
||||
|
|
11
core/block/import/markdown/testdata/nonutflinks.md
vendored
Normal file
11
core/block/import/markdown/testdata/nonutflinks.md
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
NonUtf 1 [test6](こんにちは.md)
|
||||
|
||||
NonUtf 2 [test7](你好.csv)
|
||||
|
||||
NonUtf 1 **[test6](こんにちは.md)**
|
||||
|
||||
NonUtf 2 **[test7](你好.csv)**
|
||||
|
||||
**[test6](こんにちは.md)**
|
||||
|
||||
**[test7](你好.csv)**
|
0
core/block/import/markdown/testdata/こんにちは.md
vendored
Normal file
0
core/block/import/markdown/testdata/こんにちは.md
vendored
Normal file
0
core/block/import/markdown/testdata/你好.csv
vendored
Normal file
0
core/block/import/markdown/testdata/你好.csv
vendored
Normal file
|
|
@ -243,8 +243,7 @@ func TestPb_GetSnapshots(t *testing.T) {
|
|||
dir := t.TempDir()
|
||||
p := &Pb{}
|
||||
zipPath := filepath.Join(dir, "empty.zip")
|
||||
err := test.CreateEmptyZip(t, zipPath)
|
||||
assert.Nil(t, err)
|
||||
test.CreateEmptyZip(t, zipPath)
|
||||
|
||||
// when
|
||||
_, ce := p.GetSnapshots(context.Background(), &pb.RpcObjectImportRequest{
|
||||
|
|
|
@ -78,8 +78,7 @@ func TestTXT_GetSnapshots(t *testing.T) {
|
|||
// given
|
||||
dir := t.TempDir()
|
||||
zipPath := filepath.Join(dir, "empty.zip")
|
||||
err := test.CreateEmptyZip(t, zipPath)
|
||||
assert.Nil(t, err)
|
||||
test.CreateEmptyZip(t, zipPath)
|
||||
|
||||
h := &TXT{}
|
||||
p := process.NewProgress(&pb.ModelProcessMessageOfImport{Import: &pb.ModelProcessImport{}})
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue