1
0
Fork 0
mirror of https://github.com/anyproto/anytype-heart.git synced 2025-06-10 18:10:49 +09:00
anytype-heart/anymark/markdown.go
2020-06-17 17:35:19 +03:00

252 lines
7.7 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package goldmark implements functions to convert markdown text to a desired format.
package anymark
import (
"bufio"
"bytes"
"io"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/anytypeio/go-anytype-library/pb/model"
"github.com/anytypeio/go-anytype-middleware/anymark/blocksUtil"
"github.com/anytypeio/go-anytype-middleware/anymark/renderer"
"github.com/anytypeio/go-anytype-middleware/anymark/renderer/html"
"github.com/anytypeio/go-anytype-middleware/anymark/spaceReplace"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
htmlConverter "github.com/JohannesKaufmann/html-to-markdown"
)
// DefaultParser returns a new Parser that is configured by default values.
func DefaultParser() parser.Parser {
return parser.NewParser(parser.WithBlockParsers(parser.DefaultBlockParsers()...),
parser.WithInlineParsers(parser.DefaultInlineParsers()...),
parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
)
}
// DefaultRenderer returns a new Renderer that is configured by default values.
func DefaultRenderer() renderer.Renderer {
return renderer.NewRenderer(renderer.WithNodeRenderers(util.Prioritized(html.NewRenderer(), 1000)))
}
var (
defaultMarkdown = New()
linkRegexp = regexp.MustCompile(`\[([\s\S]*?)\]\((.*?)\)`)
markRightEdge = regexp.MustCompile(`([^\*\~\_\s])([\*\~\_]+)(\S)`)
linkLeftEdge = regexp.MustCompile(`(\S)\[`)
reEmptyLinkText = regexp.MustCompile(`\[[\s]*?\]\(([\s\S]*?)\)`)
reWikiCode = regexp.MustCompile(`<span[\s\S]*?>([\s\S]*?)</span>`)
reWikiWbr = regexp.MustCompile(`<wbr[^>]*>`)
)
// Convert interprets a UTF-8
//bytes source in Markdown and
// write rendered contents to a writer w.
func Convert(source []byte, w io.Writer, opts ...parser.ParseOption) error {
return defaultMarkdown.Convert(source, w, opts...)
}
// A Markdown interface offers functions to convert Markdown text to
// a desired format.
type Markdown interface {
// Convert interprets a UTF-8 bytes source in Markdown and write rendered
// contents to a writer w.
Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error
ConvertBlocks(source []byte, bWriter blocksUtil.RWriter, opts ...parser.ParseOption) error
HTMLToBlocks(source []byte) (err error, blocks []*model.Block, rootBlockIDs []string)
MarkdownToBlocks(markdownSource []byte, baseFilepath string, allFileShortPaths []string) (blocks []*model.Block, rootBlockIDs []string, err error)
// Parser returns a Parser that will be used for conversion.
Parser() parser.Parser
// SetParser sets a Parser to this object.
SetParser(parser.Parser)
// Parser returns a Renderer that will be used for conversion.
Renderer() renderer.Renderer
// SetRenderer sets a Renderer to this object.
SetRenderer(renderer.Renderer)
}
// Option is a functional option type for Markdown objects.
type Option func(*markdown)
// WithExtensions adds extensions.
func WithExtensions(ext ...Extender) Option {
return func(m *markdown) {
m.extensions = append(m.extensions, ext...)
}
}
// WithParser allows you to override the default parser.
func WithParser(p parser.Parser) Option {
return func(m *markdown) {
m.parser = p
}
}
// WithParserOptions applies options for the parser.
func WithParserOptions(opts ...parser.Option) Option {
return func(m *markdown) {
m.parser.AddOptions(opts...)
}
}
// WithRenderer allows you to override the default renderer.
func WithRenderer(r renderer.Renderer) Option {
return func(m *markdown) {
m.renderer = r
}
}
// WithRendererOptions applies options for the renderer.
func WithRendererOptions(opts ...renderer.Option) Option {
return func(m *markdown) {
m.renderer.AddOptions(opts...)
}
}
type markdown struct {
parser parser.Parser
renderer renderer.Renderer
extensions []Extender
}
// New returns a new Markdown with given options.
func New(options ...Option) Markdown {
md := &markdown{
parser: DefaultParser(),
renderer: DefaultRenderer(),
extensions: []Extender{},
}
for _, opt := range options {
opt(md)
}
for _, e := range md.extensions {
e.Extend(md)
}
return md
}
func (m *markdown) Convert(source []byte, w io.Writer, opts ...parser.ParseOption) error {
reader := text.NewReader(source)
doc := m.parser.Parse(reader, opts...)
writer := bufio.NewWriter(w)
bWriter := blocksUtil.NewRWriter(writer, "", []string{})
//bWriter := blocksUtil.ExtendWriter(writer, &rState)
return m.renderer.Render(bWriter, source, doc)
}
func (m *markdown) ConvertBlocks(source []byte, bWriter blocksUtil.RWriter, opts ...parser.ParseOption) error {
reader := text.NewReader(source)
doc := m.parser.Parse(reader, opts...)
return m.renderer.Render(bWriter, source, doc)
}
func (m *markdown) MarkdownToBlocks(markdownSource []byte, baseFilepath string, allFileShortPaths []string) (blocks []*model.Block, rootBlockIDs []string, err error) {
var b bytes.Buffer
writer := bufio.NewWriter(&b)
bWriter := blocksUtil.NewRWriter(writer, baseFilepath, allFileShortPaths)
// allFileShortPaths,
err = m.ConvertBlocks(markdownSource, bWriter)
if err != nil {
return nil, nil, err
}
return bWriter.GetBlocks(), bWriter.GetRootBlockIDs(), nil
}
func (m *markdown) HTMLToBlocks(source []byte) (err error, blocks []*model.Block, rootBlockIDs []string) {
preprocessedSource := string(source)
// special wiki spaces
preprocessedSource = strings.ReplaceAll(preprocessedSource, "<span> </span>", " ")
preprocessedSource = reWikiWbr.ReplaceAllString(preprocessedSource, ``)
// Pattern: <pre> <span>\n console \n</span> <span>\n . \n</span> <span>\n log \n</span>
preprocessedSource = reWikiCode.ReplaceAllString(preprocessedSource, `$1`)
strikethrough := htmlConverter.Rule{
Filter: []string{"span"},
Replacement: func(content string, selec *goquery.Selection, opt *htmlConverter.Options) *string {
// If the span element has not the classname `bb_strike` return nil.
// That way the next rules will apply. In this case the commonmark rules.
// -> return nil -> next rule applies
if !selec.HasClass("bb_strike") {
return nil
}
// Trim spaces so that the following does NOT happen: `~ and cake~`.
// Because of the space it is not recognized as strikethrough.
// -> trim spaces at begin&end of string when inside strong/italic/...
content = strings.TrimSpace(content)
return htmlConverter.String("~" + content + "~")
},
}
br := htmlConverter.Rule{
Filter: []string{"br"},
Replacement: func(content string, selec *goquery.Selection, opt *htmlConverter.Options) *string {
content = strings.TrimSpace(content)
return htmlConverter.String("\n" + content)
},
}
converter := htmlConverter.NewConverter("", true, &htmlConverter.Options{
DisableEscaping: true,
AllowHeaderBreak: true,
EmDelimiter: "*",
})
converter.AddRules(strikethrough, br)
md, _ := converter.ConvertString(preprocessedSource)
//md := html2md.Convert(preprocessedSource)
md = spaceReplace.WhitespaceNormalizeString(md)
//md = strings.ReplaceAll(md, "* *", "* *")
md = reEmptyLinkText.ReplaceAllString(md, `[$1]($1)`)
var b bytes.Buffer
writer := bufio.NewWriter(&b)
bWriter := blocksUtil.NewRWriter(writer, "", []string{})
err = m.ConvertBlocks([]byte(md), bWriter)
if err != nil {
return err, nil, nil
}
return nil, bWriter.GetBlocks(), bWriter.GetRootBlockIDs()
}
func (m *markdown) Parser() parser.Parser {
return m.parser
}
func (m *markdown) SetParser(v parser.Parser) {
m.parser = v
}
func (m *markdown) Renderer() renderer.Renderer {
return m.renderer
}
func (m *markdown) SetRenderer(v renderer.Renderer) {
m.renderer = v
}
// An Extender interface is used for extending Markdown.
type Extender interface {
// Extend extends the Markdown.
Extend(Markdown)
}