Refactor cat-file batch operations and support --batch-command approach (#35775)

Replace #34651 and address more problems including fix framework bugs and changing to QueryInfo and QueryContent calls.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
wxiaoguang
2026-01-09 05:37:36 +08:00
committed by GitHub
parent ee9d8893a7
commit e226720cff
34 changed files with 832 additions and 812 deletions
+13 -31
View File
@@ -8,6 +8,7 @@ package pipeline
import (
"bufio"
"bytes"
"encoding/hex"
"io"
"sort"
"strings"
@@ -53,12 +54,9 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
}
defer cancel()
batchStdinWriter := batch.Writer()
batchReader := batch.Reader()
// We'll use a scanner for the revList because it's simpler than a bufio.Reader
scan := bufio.NewScanner(revListReader)
trees := [][]byte{}
trees := []string{}
paths := []string{}
fnameBuf := make([]byte, 4096)
@@ -67,14 +65,10 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
for scan.Scan() {
// Get the next commit ID
commitID := scan.Bytes()
commitID := scan.Text()
// push the commit to the cat-file --batch process
_, err := batchStdinWriter.Write(commitID)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte{'\n'})
info, batchReader, err := batch.QueryContent(commitID)
if err != nil {
return nil, err
}
@@ -84,26 +78,20 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
commitReadingLoop:
for {
_, typ, size, err := git.ReadBatchLine(batchReader)
if err != nil {
return nil, err
}
switch typ {
switch info.Type {
case "tag":
// This shouldn't happen but if it does well just get the commit and try again
id, err := git.ReadTagObjectID(batchReader, size)
id, err := git.ReadTagObjectID(batchReader, info.Size)
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte(id + "\n"))
if err != nil {
if info, batchReader, err = batch.QueryContent(id); err != nil {
return nil, err
}
continue
case "commit":
// Read in the commit to get its tree and in case this is one of the last used commits
curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(string(commitID)), io.LimitReader(batchReader, size))
curCommit, err = git.CommitFromReader(repo, git.MustIDFromString(commitID), io.LimitReader(batchReader, info.Size))
if err != nil {
return nil, err
}
@@ -111,13 +99,13 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
return nil, err
}
if _, err := batchStdinWriter.Write([]byte(curCommit.Tree.ID.String() + "\n")); err != nil {
if info, _, err = batch.QueryContent(curCommit.Tree.ID.String()); err != nil {
return nil, err
}
curPath = ""
case "tree":
var n int64
for n < size {
for n < info.Size {
mode, fname, binObjectID, count, err := git.ParseCatFileTreeLine(objectID.Type(), batchReader, modeBuf, fnameBuf, workingShaBuf)
if err != nil {
return nil, err
@@ -133,9 +121,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
}
resultsMap[curCommit.ID.String()+":"+curPath+string(fname)] = &result
} else if string(mode) == git.EntryModeTree.String() {
hexObjectID := make([]byte, objectID.Type().FullLength())
git.BinToHex(objectID.Type(), binObjectID, hexObjectID)
trees = append(trees, hexObjectID)
trees = append(trees, hex.EncodeToString(binObjectID))
paths = append(paths, curPath+string(fname)+"/")
}
}
@@ -143,11 +129,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
return nil, err
}
if len(trees) > 0 {
_, err := batchStdinWriter.Write(trees[len(trees)-1])
if err != nil {
return nil, err
}
_, err = batchStdinWriter.Write([]byte("\n"))
info, _, err = batch.QueryContent(trees[len(trees)-1])
if err != nil {
return nil, err
}
@@ -158,7 +140,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
break commitReadingLoop
}
default:
if err := git.DiscardFull(batchReader, size+1); err != nil {
if err := git.DiscardFull(batchReader, info.Size+1); err != nil {
return nil, err
}
}
+38
View File
@@ -0,0 +1,38 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package pipeline
import (
"testing"
"time"
"code.gitea.io/gitea/modules/git"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFindLFSFile(t *testing.T) {
repoPath := "../../../tests/gitea-repositories-meta/user2/lfs.git"
gitRepo, err := git.OpenRepository(t.Context(), repoPath)
require.NoError(t, err)
defer gitRepo.Close()
objectID := git.MustIDFromString("2b6c6c4eaefa24b22f2092c3d54b263ff26feb58")
stats, err := FindLFSFile(gitRepo, objectID)
require.NoError(t, err)
tm, err := time.Parse(time.RFC3339, "2022-12-21T17:56:42-05:00")
require.NoError(t, err)
assert.Len(t, stats, 1)
assert.Equal(t, "CONTRIBUTING.md", stats[0].Name)
assert.Equal(t, "73cf03db6ece34e12bf91e8853dc58f678f2f82d", stats[0].SHA)
assert.Equal(t, "Initial commit", stats[0].Summary)
assert.Equal(t, tm, stats[0].When)
assert.Empty(t, stats[0].ParentHashes)
assert.Equal(t, "master", stats[0].BranchName)
assert.Equal(t, "master", stats[0].FullCommitName)
}
+14
View File
@@ -0,0 +1,14 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package pipeline
import (
"testing"
"code.gitea.io/gitea/modules/git"
)
func TestMain(m *testing.M) {
git.RunGitTests(m)
}