Avoid emoji mismatch and allow to only enable chosen emojis (#35692)

Fix #23635
This commit is contained in:
wxiaoguang
2025-10-20 04:06:45 +08:00
committed by GitHub
parent c30d74d0f9
commit 66ee8f3553
6 changed files with 115 additions and 92 deletions
+67 -65
View File
@@ -8,7 +8,9 @@ import (
"io"
"sort"
"strings"
"sync"
"sync/atomic"
"code.gitea.io/gitea/modules/setting"
)
// Gemoji is a set of emoji data.
@@ -23,74 +25,78 @@ type Emoji struct {
SkinTones bool
}
var (
// codeMap provides a map of the emoji unicode code to its emoji data.
codeMap map[string]int
type globalVarsStruct struct {
codeMap map[string]int // emoji unicode code to its emoji data.
aliasMap map[string]int // the alias to its emoji data.
emptyReplacer *strings.Replacer // string replacer for emoji codes, used for finding emoji positions.
codeReplacer *strings.Replacer // string replacer for emoji codes.
aliasReplacer *strings.Replacer // string replacer for emoji aliases.
}
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int
var globalVarsStore atomic.Pointer[globalVarsStruct]
// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer
func globalVars() *globalVarsStruct {
vars := globalVarsStore.Load()
if vars != nil {
return vars
}
// although there can be concurrent calls, the result should be the same, and there is no performance problem
vars = &globalVarsStruct{}
vars.codeMap = make(map[string]int, len(GemojiData))
vars.aliasMap = make(map[string]int, len(GemojiData))
// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer
// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)
// aliasReplacer is the string replacer for emoji aliases.
aliasReplacer *strings.Replacer
// sort from largest to small so we match combined emoji first
sort.Slice(GemojiData, func(i, j int) bool {
return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
})
once sync.Once
)
func loadMap() {
once.Do(func() {
// initialize
codeMap = make(map[string]int, len(GemojiData))
aliasMap = make(map[string]int, len(GemojiData))
// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)
// sort from largest to small so we match combined emoji first
sort.Slice(GemojiData, func(i, j int) bool {
return len(GemojiData[i].Emoji) > len(GemojiData[j].Emoji)
})
for i, e := range GemojiData {
if e.Emoji == "" || len(e.Aliases) == 0 {
continue
}
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
// setup aliases
for _, a := range e.Aliases {
if a == "" {
continue
}
aliasMap[a] = i
aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji)
}
for idx, emoji := range GemojiData {
if emoji.Emoji == "" || len(emoji.Aliases) == 0 {
continue
}
// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
// process aliases
firstAlias := ""
for _, alias := range emoji.Aliases {
if alias == "" {
continue
}
enabled := len(setting.UI.EnabledEmojisSet) == 0 || setting.UI.EnabledEmojisSet.Contains(alias)
if !enabled {
continue
}
if firstAlias == "" {
firstAlias = alias
}
vars.aliasMap[alias] = idx
aliasPairs = append(aliasPairs, ":"+alias+":", emoji.Emoji)
}
// process emoji code
if firstAlias != "" {
vars.codeMap[emoji.Emoji] = idx
codePairs = append(codePairs, emoji.Emoji, ":"+emoji.Aliases[0]+":")
emptyPairs = append(emptyPairs, emoji.Emoji, emoji.Emoji)
}
}
// create replacers
vars.emptyReplacer = strings.NewReplacer(emptyPairs...)
vars.codeReplacer = strings.NewReplacer(codePairs...)
vars.aliasReplacer = strings.NewReplacer(aliasPairs...)
globalVarsStore.Store(vars)
return vars
}
// FromCode retrieves the emoji data based on the provided unicode code (ie,
// "\u2618" will return the Gemoji data for "shamrock").
func FromCode(code string) *Emoji {
loadMap()
i, ok := codeMap[code]
i, ok := globalVars().codeMap[code]
if !ok {
return nil
}
@@ -102,12 +108,11 @@ func FromCode(code string) *Emoji {
// "alias" or ":alias:" (ie, "shamrock" or ":shamrock:" will return the Gemoji
// data for "shamrock").
func FromAlias(alias string) *Emoji {
loadMap()
if strings.HasPrefix(alias, ":") && strings.HasSuffix(alias, ":") {
alias = alias[1 : len(alias)-1]
}
i, ok := aliasMap[alias]
i, ok := globalVars().aliasMap[alias]
if !ok {
return nil
}
@@ -119,15 +124,13 @@ func FromAlias(alias string) *Emoji {
// alias (in the form of ":alias:") (ie, "\u2618" will be converted to
// ":shamrock:").
func ReplaceCodes(s string) string {
loadMap()
return codeReplacer.Replace(s)
return globalVars().codeReplacer.Replace(s)
}
// ReplaceAliases replaces all aliases of the form ":alias:" with its
// corresponding unicode value.
func ReplaceAliases(s string) string {
loadMap()
return aliasReplacer.Replace(s)
return globalVars().aliasReplacer.Replace(s)
}
type rememberSecondWriteWriter struct {
@@ -163,7 +166,6 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
secondWriteWriter := rememberSecondWriteWriter{}
// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
@@ -175,7 +177,7 @@ func FindEmojiSubmatchIndex(s string) []int {
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
_, _ = globalVars().emptyReplacer.WriteString(&secondWriteWriter, s)
// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
+18 -7
View File
@@ -7,14 +7,13 @@ package emoji
import (
"testing"
"code.gitea.io/gitea/modules/container"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
)
func TestDumpInfo(t *testing.T) {
t.Logf("codes: %d", len(codeMap))
t.Logf("aliases: %d", len(aliasMap))
}
func TestLookup(t *testing.T) {
a := FromCode("\U0001f37a")
b := FromCode("🍺")
@@ -24,7 +23,6 @@ func TestLookup(t *testing.T) {
assert.Equal(t, a, b)
assert.Equal(t, b, c)
assert.Equal(t, c, d)
assert.Equal(t, a, d)
m := FromCode("\U0001f44d")
n := FromAlias(":thumbsup:")
@@ -32,7 +30,20 @@ func TestLookup(t *testing.T) {
assert.Equal(t, m, n)
assert.Equal(t, m, o)
assert.Equal(t, n, o)
defer test.MockVariableValue(&setting.UI.EnabledEmojisSet, container.SetOf("thumbsup"))()
defer globalVarsStore.Store(nil)
globalVarsStore.Store(nil)
a = FromCode("\U0001f37a")
c = FromAlias(":beer:")
m = FromCode("\U0001f44d")
n = FromAlias(":thumbsup:")
o = FromAlias("+1")
assert.Nil(t, a)
assert.Nil(t, c)
assert.NotNil(t, m)
assert.NotNil(t, n)
assert.Nil(t, o)
}
func TestReplacers(t *testing.T) {