Use merge tree to detect conflicts when possible (#36400)

In Git 2.38, the `merge-tree` command introduced the `--write-tree`
option, which works directly on bare repositories. In Git 2.40, a new parameter `--merge-base` introduced so we require Git 2.40 to use the merge tree feature.

This option produces the merged tree object ID, allowing us to perform
diffs between commits without creating a temporary repository. By
avoiding the overhead of setting up and tearing down temporary repos,
this approach delivers a notable performance improvement.

It also fixes a possible situation that conflict files might be empty
but it's a conflict status according to
https://git-scm.com/docs/git-merge-tree#_mistakes_to_avoid

Replace #35542

---------

Signed-off-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
Lunny Xiao
2026-01-27 11:57:20 -08:00
committed by GitHub
parent 125257eacf
commit 1463426a27
29 changed files with 607 additions and 126 deletions
+2
View File
@@ -32,6 +32,7 @@ type Features struct {
SupportedObjectFormats []ObjectFormat // sha1, sha256
SupportCheckAttrOnBare bool // >= 2.40
SupportCatFileBatchCommand bool // >= 2.36, support `git cat-file --batch-command`
SupportGitMergeTree bool // >= 2.40 // we also need "--merge-base"
}
var defaultFeatures *Features
@@ -77,6 +78,7 @@ func loadGitVersionFeatures() (*Features, error) {
}
features.SupportCheckAttrOnBare = features.CheckVersionAtLeast("2.40")
features.SupportCatFileBatchCommand = features.CheckVersionAtLeast("2.36")
features.SupportGitMergeTree = features.CheckVersionAtLeast("2.40") // we also need "--merge-base"
return features, nil
}
+37 -7
View File
@@ -45,6 +45,7 @@ type Command struct {
cmdStartTime time.Time
parentPipeFiles []*os.File
parentPipeReaders []*os.File
childrenPipeFiles []*os.File
// only os.Pipe and in-memory buffers can work with Stdin safely, see https://github.com/golang/go/issues/77227 if the command would exit unexpectedly
@@ -283,6 +284,7 @@ func (c *Command) makeStdoutStderr(w *io.Writer) (PipeReader, func()) {
}
c.childrenPipeFiles = append(c.childrenPipeFiles, pw)
c.parentPipeFiles = append(c.parentPipeFiles, pr)
c.parentPipeReaders = append(c.parentPipeReaders, pr)
*w /* stdout, stderr */ = pw
return &pipeReader{f: pr}, func() { pr.Close() }
}
@@ -348,7 +350,13 @@ func (c *Command) WithStdoutCopy(w io.Writer) *Command {
return c
}
func (c *Command) WithPipelineFunc(f func(Context) error) *Command {
// WithPipelineFunc sets the pipeline function for the command.
// The pipeline function will be called in the Run / Wait function after the command is started successfully.
// The function can read/write from/to the command's stdio pipes (if any).
// The pipeline function can cancel (kill) the command by calling ctx.CancelPipeline before the command finishes.
// The returned error of Run / Wait can be joined errors from the pipeline function, context cause, and command exit error.
// Caller can get the pipeline function's error (if any) by UnwrapPipelineError.
func (c *Command) WithPipelineFunc(f func(ctx Context) error) *Command {
c.opts.PipelineFunc = f
return c
}
@@ -444,6 +452,12 @@ func (c *Command) closePipeFiles(files []*os.File) {
}
}
func (c *Command) discardPipeReaders(files []*os.File) {
for _, f := range files {
_, _ = io.Copy(io.Discard, f)
}
}
func (c *Command) Wait() error {
defer func() {
// The reader in another goroutine might be still reading the stdout, so we shouldn't close the pipes here
@@ -454,15 +468,31 @@ func (c *Command) Wait() error {
if c.opts.PipelineFunc != nil {
errPipeline := c.opts.PipelineFunc(&cmdContext{Context: c.cmdCtx, cmd: c})
// after the pipeline function returns, we can safely cancel the command context and close the pipes, the data in pipes should have been consumed
c.cmdCancel(errPipeline)
if context.Cause(c.cmdCtx) == nil {
// if the context is not canceled explicitly, we need to discard the unread data,
// and wait for the command to exit normally, and then get its exit code
c.discardPipeReaders(c.parentPipeReaders)
} // else: canceled command will be killed, and the exit code is caused by kill
// after the pipeline function returns, we can safely close the pipes, then wait for the command to exit
c.closePipeFiles(c.parentPipeFiles)
errWait := c.cmd.Wait()
errCause := context.Cause(c.cmdCtx)
// the pipeline function should be able to know whether it succeeds or fails
if errPipeline == nil && (errCause == nil || errors.Is(errCause, context.Canceled)) {
return nil
errCause := context.Cause(c.cmdCtx) // in case the cause is set during Wait(), get the final cancel cause
if unwrapped, ok := UnwrapPipelineError(errCause); ok {
if unwrapped != errPipeline {
panic("unwrapped context pipeline error should be the same one returned by pipeline function")
}
if unwrapped == nil {
// the pipeline function declares that there is no error, and it cancels (kills) the command ahead,
// so we should ignore the errors from "wait" and "cause"
errWait, errCause = nil, nil
}
}
// some legacy code still need to access the error returned by pipeline function by "==" but not "errors.Is"
// so we need to make sure the original error is able to be unwrapped by UnwrapPipelineError
return errors.Join(wrapPipelineError(errPipeline), errCause, errWait)
}
+9 -5
View File
@@ -10,9 +10,9 @@ import (
type Context interface {
context.Context
// CancelWithCause is a helper function to cancel the context with a specific error cause
// And it returns the same error for convenience, to break the PipelineFunc easily
CancelWithCause(err error) error
// CancelPipeline is a helper function to cancel the command context (kill the command) with a specific error cause,
// it returns the same error for convenience to break the PipelineFunc easily
CancelPipeline(err error) error
// In the future, this interface will be extended to support stdio pipe readers/writers
}
@@ -22,7 +22,11 @@ type cmdContext struct {
cmd *Command
}
func (c *cmdContext) CancelWithCause(err error) error {
c.cmd.cmdCancel(err)
func (c *cmdContext) CancelPipeline(err error) error {
// pipelineError is used to distinguish between:
// * context canceled by pipeline caller with/without error (normal cancellation)
// * context canceled by parent context (still context.Canceled error)
// * other causes
c.cmd.cmdCancel(pipelineError{err})
return err
}
+5 -5
View File
@@ -92,10 +92,10 @@ func wrapPipelineError(err error) error {
return pipelineError{err}
}
func ErrorAsPipeline(err error) error {
var pipelineErr pipelineError
if errors.As(err, &pipelineErr) {
return pipelineErr.error
func UnwrapPipelineError(err error) (error, bool) { //nolint:revive // this is for error unwrapping
var pe pipelineError
if errors.As(err, &pe) {
return pe.error, true
}
return nil
return nil, false
}
+1 -1
View File
@@ -102,7 +102,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
}
if line == "" {
if len(results) >= opts.MaxResultLimit {
return ctx.CancelWithCause(nil)
return ctx.CancelPipeline(nil)
}
isInBlock = false
continue
+2 -2
View File
@@ -101,7 +101,7 @@ func WalkShowRef(ctx context.Context, repoPath string, extraArgs gitcmd.TrustedC
stdoutReader, stdoutReaderClose := cmd.MakeStdoutPipe()
defer stdoutReaderClose()
cmd.WithDir(repoPath).
WithPipelineFunc(func(c gitcmd.Context) error {
WithPipelineFunc(func(gitcmd.Context) error {
bufReader := bufio.NewReader(stdoutReader)
for i < skip {
_, isPrefix, err := bufReader.ReadLine()
@@ -165,7 +165,7 @@ func WalkShowRef(ctx context.Context, repoPath string, extraArgs gitcmd.TrustedC
return nil
})
err = cmd.RunWithStderr(ctx)
if errPipeline := gitcmd.ErrorAsPipeline(err); errPipeline != nil {
if errPipeline, ok := gitcmd.UnwrapPipelineError(err); ok {
return i, errPipeline // keep the old behavior: return pipeline error directly
}
return i, err
+6 -14
View File
@@ -4,29 +4,21 @@
package gitrepo
import (
"os"
"path/filepath"
"testing"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/tempdir"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/test"
)
func TestMain(m *testing.M) {
gitHomePath, cleanup, err := tempdir.OsTempDir("gitea-test").MkdirTempRandom("git-home")
if err != nil {
log.Fatal("Unable to create temp dir: %v", err)
}
defer cleanup()
// resolve repository path relative to the test directory
testRootDir := test.SetupGiteaRoot()
repoPath = func(repo Repository) string {
return filepath.Join(testRootDir, "/modules/git/tests/repos", repo.RelativePath())
if filepath.IsAbs(repo.RelativePath()) {
return repo.RelativePath() // for testing purpose only
}
return filepath.Join(testRootDir, "modules/git/tests/repos", repo.RelativePath())
}
setting.Git.HomePath = gitHomePath
os.Exit(m.Run())
git.RunGitTests(m)
}
+59
View File
@@ -0,0 +1,59 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitrepo
import (
"bufio"
"context"
"fmt"
"code.gitea.io/gitea/modules/git/gitcmd"
"code.gitea.io/gitea/modules/util"
)
const MaxConflictedDetectFiles = 10
// MergeTree performs a merge between two commits (baseRef and headRef) with an optional merge base.
// It returns the resulting tree hash, a list of conflicted files (if any), and an error if the operation fails.
// If there are no conflicts, the list of conflicted files will be nil.
func MergeTree(ctx context.Context, repo Repository, baseRef, headRef, mergeBase string) (treeID string, isErrHasConflicts bool, conflictFiles []string, _ error) {
cmd := gitcmd.NewCommand("merge-tree", "--write-tree", "-z", "--name-only", "--no-messages").
AddOptionFormat("--merge-base=%s", mergeBase).
AddDynamicArguments(baseRef, headRef)
stdout, stdoutClose := cmd.MakeStdoutPipe()
defer stdoutClose()
cmd.WithPipelineFunc(func(ctx gitcmd.Context) error {
// https://git-scm.com/docs/git-merge-tree/2.38.0#OUTPUT
// For a conflicted merge, the output is:
// <OID of toplevel tree>NUL
// <Conflicted file name 1>NUL
// <Conflicted file name 2>NUL
// ...
scanner := bufio.NewScanner(stdout)
scanner.Split(util.BufioScannerSplit(0))
for scanner.Scan() {
line := scanner.Text()
if treeID == "" { // first line is tree ID
treeID = line
continue
}
conflictFiles = append(conflictFiles, line)
if len(conflictFiles) >= MaxConflictedDetectFiles {
break
}
}
return scanner.Err()
})
err := RunCmdWithStderr(ctx, repo, cmd)
// For a successful, non-conflicted merge, the exit status is 0. When the merge has conflicts, the exit status is 1.
// A merge can have conflicts without having individual files conflict
// https://git-scm.com/docs/git-merge-tree/2.38.0#_mistakes_to_avoid
isErrHasConflicts = gitcmd.IsErrorExitCode(err, 1)
if err == nil || isErrHasConflicts {
return treeID, isErrHasConflicts, conflictFiles, nil
}
return "", false, nil, fmt.Errorf("run merge-tree failed: %w", err)
}
+82
View File
@@ -0,0 +1,82 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitrepo
import (
"path/filepath"
"testing"
"code.gitea.io/gitea/modules/git/gitcmd"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func prepareRepoDirRenameConflict(t *testing.T) string {
repoDir := filepath.Join(t.TempDir(), "repo-dir-rename-conflict.git")
require.NoError(t, gitcmd.NewCommand("init", "--bare").AddDynamicArguments(repoDir).Run(t.Context()))
stdin := `blob
mark :1
data 2
b
blob
mark :2
data 2
c
reset refs/heads/master
commit refs/heads/master
mark :3
author test <test@example.com> 1769202331 -0800
committer test <test@example.com> 1769202331 -0800
data 2
O
M 100644 :1 z/b
M 100644 :2 z/c
commit refs/heads/split
mark :4
author test <test@example.com> 1769202336 -0800
committer test <test@example.com> 1769202336 -0800
data 2
A
from :3
M 100644 :2 w/c
M 100644 :1 y/b
D z/b
D z/c
blob
mark :5
data 2
d
commit refs/heads/add
mark :6
author test <test@example.com> 1769202342 -0800
committer test <test@example.com> 1769202342 -0800
data 2
B
from :3
M 100644 :5 z/d
`
require.NoError(t, gitcmd.NewCommand("fast-import").WithDir(repoDir).WithStdinBytes([]byte(stdin)).Run(t.Context()))
return repoDir
}
func TestMergeTreeDirectoryRenameConflictWithoutFiles(t *testing.T) {
repoDir := prepareRepoDirRenameConflict(t)
require.DirExists(t, repoDir)
repo := &mockRepository{path: repoDir}
mergeBase, err := MergeBase(t.Context(), repo, "add", "split")
require.NoError(t, err)
treeID, conflicted, conflictedFiles, err := MergeTree(t.Context(), repo, "add", "split", mergeBase)
require.NoError(t, err)
assert.True(t, conflicted)
assert.Empty(t, conflictedFiles)
assert.Equal(t, "5e3dd4cfc5b11e278a35b2daa83b7274175e3ab1", treeID)
}
+22
View File
@@ -0,0 +1,22 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package util
import "bytes"
func BufioScannerSplit(b byte) func(data []byte, atEOF bool) (advance int, token []byte, err error) {
// reference: bufio.ScanLines
return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, b); i >= 0 {
return i + 1, data[0:i], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
}