Fix markup heading parsing, fix emphasis parsing (#36284)

Fixes #36106, fix #17958

---------

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Heath Dutton🕴️
2026-01-23 15:24:58 -05:00
committed by GitHub
parent cfd7218395
commit 0f78b99998
15 changed files with 260 additions and 196 deletions
+10 -14
View File
@@ -41,11 +41,10 @@ func (g *ASTTransformer) applyElementDir(n ast.Node) {
// Transform transforms the given AST tree.
func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
firstChild := node.FirstChild()
tocMode := ""
ctx := pc.Get(renderContextKey).(*markup.RenderContext)
rc := pc.Get(renderConfigKey).(*RenderConfig)
tocList := make([]Header, 0, 20)
tocMode := ""
if rc.yamlNode != nil {
metaNode := rc.toMetaNode(g)
if metaNode != nil {
@@ -60,8 +59,6 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
}
switch v := n.(type) {
case *ast.Heading:
g.transformHeading(ctx, v, reader, &tocList)
case *ast.Paragraph:
g.applyElementDir(v)
case *ast.List:
@@ -79,19 +76,18 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
return ast.WalkContinue, nil
})
showTocInMain := tocMode == "true" /* old behavior, in main view */ || tocMode == "main"
showTocInSidebar := !showTocInMain && tocMode != "false" // not hidden, not main, then show it in sidebar
if len(tocList) > 0 && (showTocInMain || showTocInSidebar) {
if showTocInMain {
tocNode := createTOCNode(tocList, rc.Lang, nil)
node.InsertBefore(node, firstChild, tocNode)
} else {
tocNode := createTOCNode(tocList, rc.Lang, map[string]string{"open": "open"})
ctx.SidebarTocNode = tocNode
if ctx.RenderOptions.EnableHeadingIDGeneration {
showTocInMain := tocMode == "true" /* old behavior, in main view */ || tocMode == "main"
showTocInSidebar := !showTocInMain && tocMode != "false" // not hidden, not main, then show it in sidebar
switch {
case showTocInMain:
ctx.TocShowInSection = markup.TocShowInMain
case showTocInSidebar:
ctx.TocShowInSection = markup.TocShowInSidebar
}
}
if len(rc.Lang) > 0 {
if rc.Lang != "" {
node.SetAttributeString("lang", []byte(rc.Lang))
}
}
+40 -2
View File
@@ -5,6 +5,7 @@
package markdown
import (
"bytes"
"errors"
"html/template"
"io"
@@ -21,10 +22,12 @@ import (
"github.com/yuin/goldmark"
highlighting "github.com/yuin/goldmark-highlighting/v2"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
@@ -57,7 +60,7 @@ func (l *limitWriter) Write(data []byte) (int, error) {
// newParserContext creates a parser.Context with the render context set
func newParserContext(ctx *markup.RenderContext) parser.Context {
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
pc := parser.NewContext()
pc.Set(renderContextKey, ctx)
return pc
}
@@ -101,12 +104,48 @@ func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.C
}
}
type goldmarkEmphasisParser struct {
parser.InlineParser
}
func goldmarkNewEmphasisParser() parser.InlineParser {
return &goldmarkEmphasisParser{parser.NewEmphasisParser()}
}
func (s *goldmarkEmphasisParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, _ := block.PeekLine()
if len(line) > 1 && line[0] == '_' {
// a special trick to avoid parsing emphasis in filenames like "module/__init__.py"
end := bytes.IndexByte(line[1:], '_')
mark := bytes.Index(line, []byte("_.py"))
// check whether the "end" matches "_.py" or "__.py"
if mark != -1 && (end == mark || end == mark-1) {
return nil
}
}
return s.InlineParser.Parse(parent, block, pc)
}
func goldmarkDefaultParser() parser.Parser {
return parser.NewParser(parser.WithBlockParsers(parser.DefaultBlockParsers()...),
parser.WithInlineParsers([]util.PrioritizedValue{
util.Prioritized(parser.NewCodeSpanParser(), 100),
util.Prioritized(parser.NewLinkParser(), 200),
util.Prioritized(parser.NewAutoLinkParser(), 300),
util.Prioritized(parser.NewRawHTMLParser(), 400),
util.Prioritized(goldmarkNewEmphasisParser(), 500),
}...),
parser.WithParagraphTransformers(parser.DefaultParagraphTransformers()...),
)
}
// SpecializedMarkdown sets up the Gitea specific markdown extensions
func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
// TODO: it could use a pool to cache the renderers to reuse them with different contexts
// at the moment it is fast enough (see the benchmarks)
r := &GlodmarkRender{ctx: ctx}
r.goldmarkMarkdown = goldmark.New(
goldmark.WithParser(goldmarkDefaultParser()),
goldmark.WithExtensions(
extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
extension.Strikethrough,
@@ -131,7 +170,6 @@ func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
),
goldmark.WithParserOptions(
parser.WithAttribute(),
parser.WithAutoHeadingID(),
parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
),
goldmark.WithRendererOptions(html.WithUnsafe()),
+8 -1
View File
@@ -88,6 +88,7 @@ func TestRender_Images(t *testing.T) {
}
func TestTotal_RenderString(t *testing.T) {
setting.AppURL = AppURL
defer test.MockVariableValue(&markup.RenderBehaviorForTesting.DisableAdditionalAttributes, true)()
// Test cases without ambiguous links (It is not right to copy a whole file here, instead it should clearly test what is being tested)
@@ -258,7 +259,7 @@ This PR has been generated by [Renovate Bot](https://github.com/renovatebot/reno
},
})
for i := range sameCases {
line, err := markdown.RenderString(markup.NewTestRenderContext(localMetas), sameCases[i])
line, err := markdown.RenderString(markup.NewTestRenderContext(localMetas).WithEnableHeadingIDGeneration(true), sameCases[i])
assert.NoError(t, err)
assert.Equal(t, testAnswers[i], string(line))
}
@@ -545,5 +546,11 @@ func TestMarkdownLink(t *testing.T) {
assert.Equal(t, `<p><a href="/base/foo" rel="nofollow">link1</a>
<a href="/base/foo" rel="nofollow">link2</a>
<a href="#user-content-foo" rel="nofollow">link3</a></p>
`, string(result))
input = "https://example.com/__init__.py"
result, err = markdown.RenderString(markup.NewTestRenderContext("/base", localMetas), input)
assert.NoError(t, err)
assert.Equal(t, `<p><a href="https://example.com/__init__.py" rel="nofollow">https://example.com/__init__.py</a></p>
`, string(result))
}
-59
View File
@@ -1,59 +0,0 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"bytes"
"fmt"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/markup/common"
"code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
)
type prefixedIDs struct {
values container.Set[string]
}
// Generate generates a new element id.
func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte {
dft := []byte("id")
if kind == ast.KindHeading {
dft = []byte("heading")
}
return p.GenerateWithDefault(value, dft)
}
// GenerateWithDefault generates a new element id.
func (p *prefixedIDs) GenerateWithDefault(value, dft []byte) []byte {
result := common.CleanValue(value)
if len(result) == 0 {
result = dft
}
if !bytes.HasPrefix(result, []byte("user-content-")) {
result = append([]byte("user-content-"), result...)
}
if p.values.Add(util.UnsafeBytesToString(result)) {
return result
}
for i := 1; ; i++ {
newResult := fmt.Sprintf("%s-%d", result, i)
if p.values.Add(newResult) {
return []byte(newResult)
}
}
}
// Put puts a given element id to the used ids table.
func (p *prefixedIDs) Put(value []byte) {
p.values.Add(util.UnsafeBytesToString(value))
}
func newPrefixedIDs() *prefixedIDs {
return &prefixedIDs{
values: make(container.Set[string]),
}
}
-59
View File
@@ -1,59 +0,0 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"net/url"
"code.gitea.io/gitea/modules/translation"
"github.com/yuin/goldmark/ast"
)
// Header holds the data about a header.
type Header struct {
Level int
Text string
ID string
}
func createTOCNode(toc []Header, lang string, detailsAttrs map[string]string) ast.Node {
details := NewDetails()
summary := NewSummary()
for k, v := range detailsAttrs {
details.SetAttributeString(k, []byte(v))
}
summary.AppendChild(summary, ast.NewString([]byte(translation.NewLocale(lang).TrString("toc"))))
details.AppendChild(details, summary)
ul := ast.NewList('-')
details.AppendChild(details, ul)
currentLevel := 6
for _, header := range toc {
if header.Level < currentLevel {
currentLevel = header.Level
}
}
for _, header := range toc {
for currentLevel > header.Level {
ul = ul.Parent().(*ast.List)
currentLevel--
}
for currentLevel < header.Level {
newL := ast.NewList('-')
ul.AppendChild(ul, newL)
currentLevel++
ul = newL
}
li := ast.NewListItem(currentLevel * 2)
a := ast.NewLink()
a.Destination = []byte("#" + url.QueryEscape(header.ID))
a.AppendChild(a, ast.NewString([]byte(header.Text)))
li.AppendChild(li, a)
ul.AppendChild(ul, li)
}
return details
}
@@ -1,32 +0,0 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package markdown
import (
"fmt"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/util"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
)
func (g *ASTTransformer) transformHeading(_ *markup.RenderContext, v *ast.Heading, reader text.Reader, tocList *[]Header) {
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, fmt.Appendf(nil, "%v", attr.Value))
}
}
txt := v.Text(reader.Source()) //nolint:staticcheck // Text is deprecated
header := Header{
Text: util.UnsafeBytesToString(txt),
Level: v.Level,
}
if id, found := v.AttributeString("id"); found {
header.ID = util.UnsafeBytesToString(id.([]byte))
}
*tocList = append(*tocList, header)
g.applyElementDir(v)
}