Refactor git command stdio pipe (#36422)

Most potential deadlock problems should have been fixed, and new code is
unlikely to cause new problems with the new design.

Also raise the minimum Git version required to 2.6.0 (released in 2015)
This commit is contained in:
wxiaoguang
2026-01-22 14:04:26 +08:00
committed by GitHub
parent 2a56c4ec3b
commit 3a09d7aa8d
63 changed files with 767 additions and 1016 deletions
+13 -47
View File
@@ -6,67 +6,33 @@ package pipeline
import (
"bufio"
"context"
"fmt"
"io"
"strconv"
"strings"
"sync"
"code.gitea.io/gitea/modules/git/gitcmd"
)
// CatFileBatchCheck runs cat-file with --batch-check
func CatFileBatchCheck(ctx context.Context, shasToCheckReader *io.PipeReader, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
defer wg.Done()
defer shasToCheckReader.Close()
defer catFileCheckWriter.Close()
cmd := gitcmd.NewCommand("cat-file", "--batch-check")
if err := cmd.WithDir(tmpBasePath).
WithStdin(shasToCheckReader).
WithStdout(catFileCheckWriter).
RunWithStderr(ctx); err != nil {
_ = catFileCheckWriter.CloseWithError(fmt.Errorf("git cat-file --batch-check [%s]: %w", tmpBasePath, err))
}
func CatFileBatchCheck(ctx context.Context, cmd *gitcmd.Command, tmpBasePath string) error {
cmd.AddArguments("cat-file", "--batch-check")
return cmd.WithDir(tmpBasePath).RunWithStderr(ctx)
}
// CatFileBatchCheckAllObjects runs cat-file with --batch-check --batch-all
func CatFileBatchCheckAllObjects(ctx context.Context, catFileCheckWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string, errChan chan<- error) {
defer wg.Done()
defer catFileCheckWriter.Close()
cmd := gitcmd.NewCommand("cat-file", "--batch-check", "--batch-all-objects")
if err := cmd.WithDir(tmpBasePath).
WithStdout(catFileCheckWriter).
RunWithStderr(ctx); err != nil {
_ = catFileCheckWriter.CloseWithError(fmt.Errorf("git cat-file --batch-check --batch-all-object [%s]: %w", tmpBasePath, err))
errChan <- err
}
func CatFileBatchCheckAllObjects(ctx context.Context, cmd *gitcmd.Command, tmpBasePath string) error {
return cmd.AddArguments("cat-file", "--batch-check", "--batch-all-objects").WithDir(tmpBasePath).RunWithStderr(ctx)
}
// CatFileBatch runs cat-file --batch
func CatFileBatch(ctx context.Context, shasToBatchReader *io.PipeReader, catFileBatchWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
defer wg.Done()
defer shasToBatchReader.Close()
defer catFileBatchWriter.Close()
if err := gitcmd.NewCommand("cat-file", "--batch").
WithDir(tmpBasePath).
WithStdin(shasToBatchReader).
WithStdout(catFileBatchWriter).
RunWithStderr(ctx); err != nil {
_ = shasToBatchReader.CloseWithError(fmt.Errorf("git rev-list [%s]: %w", tmpBasePath, err))
}
func CatFileBatch(ctx context.Context, cmd *gitcmd.Command, tmpBasePath string) error {
return cmd.AddArguments("cat-file", "--batch").WithDir(tmpBasePath).RunWithStderr(ctx)
}
// BlobsLessThan1024FromCatFileBatchCheck reads a pipeline from cat-file --batch-check and returns the blobs <1024 in size
func BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, shasToBatchWriter *io.PipeWriter, wg *sync.WaitGroup) {
defer wg.Done()
defer catFileCheckReader.Close()
scanner := bufio.NewScanner(catFileCheckReader)
defer func() {
_ = shasToBatchWriter.CloseWithError(scanner.Err())
}()
func BlobsLessThan1024FromCatFileBatchCheck(in io.ReadCloser, out io.WriteCloser) error {
defer out.Close()
scanner := bufio.NewScanner(in)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
@@ -82,12 +48,12 @@ func BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader *io.PipeReader, s
}
toWrite := []byte(fields[0] + "\n")
for len(toWrite) > 0 {
n, err := shasToBatchWriter.Write(toWrite)
n, err := out.Write(toWrite)
if err != nil {
_ = catFileCheckReader.CloseWithError(err)
break
return err
}
toWrite = toWrite[n:]
}
}
return scanner.Err()
}
-5
View File
@@ -4,7 +4,6 @@
package pipeline
import (
"fmt"
"time"
"code.gitea.io/gitea/modules/git"
@@ -26,7 +25,3 @@ type lfsResultSlice []*LFSResult
func (a lfsResultSlice) Len() int { return len(a) }
func (a lfsResultSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a lfsResultSlice) Less(i, j int) bool { return a[j].When.After(a[i].When) }
func lfsError(msg string, err error) error {
return fmt.Errorf("LFS error occurred, %s: err: %w", msg, err)
}
+5 -66
View File
@@ -6,11 +6,10 @@
package pipeline
import (
"bufio"
"fmt"
"io"
"sort"
"strings"
"sync"
"code.gitea.io/gitea/modules/git"
@@ -24,7 +23,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
resultsMap := map[string]*LFSResult{}
results := make([]*LFSResult, 0)
basePath := repo.Path
gogitRepo := repo.GoGitRepo()
commitsIter, err := gogitRepo.Log(&gogit.LogOptions{
@@ -32,7 +30,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
All: true,
})
if err != nil {
return nil, lfsError("failed to get GoGit CommitsIter", err)
return nil, fmt.Errorf("LFS error occurred, failed to get GoGit CommitsIter: err: %w", err)
}
err = commitsIter.ForEach(func(gitCommit *object.Commit) error {
@@ -66,7 +64,7 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
return nil
})
if err != nil && err != io.EOF {
return nil, lfsError("failure in CommitIter.ForEach", err)
return nil, fmt.Errorf("LFS error occurred, failure in CommitIter.ForEach: %w", err)
}
for _, result := range resultsMap {
@@ -82,65 +80,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
}
sort.Sort(lfsResultSlice(results))
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
shasToNameReader, shasToNameWriter := io.Pipe()
nameRevStdinReader, nameRevStdinWriter := io.Pipe()
errChan := make(chan error, 1)
wg := sync.WaitGroup{}
wg.Add(3)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(nameRevStdinReader)
i := 0
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
result := results[i]
result.FullCommitName = line
result.BranchName = strings.Split(line, "~")[0]
i++
}
}()
go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
go func() {
defer wg.Done()
defer shasToNameWriter.Close()
for _, result := range results {
i := 0
if i < len(result.SHA) {
n, err := shasToNameWriter.Write([]byte(result.SHA)[i:])
if err != nil {
errChan <- err
break
}
i += n
}
n := 0
for n < 1 {
n, err = shasToNameWriter.Write([]byte{'\n'})
if err != nil {
errChan <- err
break
}
}
}
}()
wg.Wait()
select {
case err, has := <-errChan:
if has {
return nil, lfsError("unable to obtain name for LFS files", err)
}
default:
}
return results, nil
err = fillResultNameRev(repo.Ctx, repo.Path, results)
return results, err
}
+15 -72
View File
@@ -12,34 +12,27 @@ import (
"io"
"sort"
"strings"
"sync"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/git/gitcmd"
)
// FindLFSFile finds commits that contain a provided pointer file hash
func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, error) {
func FindLFSFile(repo *git.Repository, objectID git.ObjectID) (results []*LFSResult, _ error) {
cmd := gitcmd.NewCommand("rev-list", "--all")
revListReader, revListReaderClose := cmd.MakeStdoutPipe()
defer revListReaderClose()
err := cmd.WithDir(repo.Path).
WithPipelineFunc(func(context gitcmd.Context) (err error) {
results, err = findLFSFileFunc(repo, objectID, revListReader)
return err
}).RunWithStderr(repo.Ctx)
return results, err
}
func findLFSFileFunc(repo *git.Repository, objectID git.ObjectID, revListReader io.Reader) ([]*LFSResult, error) {
resultsMap := map[string]*LFSResult{}
results := make([]*LFSResult, 0)
basePath := repo.Path
// Use rev-list to provide us with all commits in order
revListReader, revListWriter := io.Pipe()
defer func() {
_ = revListWriter.Close()
_ = revListReader.Close()
}()
go func() {
err := gitcmd.NewCommand("rev-list", "--all").
WithDir(repo.Path).
WithStdout(revListWriter).
RunWithStderr(repo.Ctx)
_ = revListWriter.CloseWithError(err)
}()
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batch, cancel, err := repo.CatFileBatch(repo.Ctx)
@@ -158,56 +151,6 @@ func FindLFSFile(repo *git.Repository, objectID git.ObjectID) ([]*LFSResult, err
}
sort.Sort(lfsResultSlice(results))
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
shasToNameReader, shasToNameWriter := io.Pipe()
nameRevStdinReader, nameRevStdinWriter := io.Pipe()
errChan := make(chan error, 1)
wg := sync.WaitGroup{}
wg.Add(3)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(nameRevStdinReader)
i := 0
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
result := results[i]
result.FullCommitName = line
result.BranchName = strings.Split(line, "~")[0]
i++
}
}()
go NameRevStdin(repo.Ctx, shasToNameReader, nameRevStdinWriter, &wg, basePath)
go func() {
defer wg.Done()
defer shasToNameWriter.Close()
for _, result := range results {
_, err := shasToNameWriter.Write([]byte(result.SHA))
if err != nil {
errChan <- err
break
}
_, err = shasToNameWriter.Write([]byte{'\n'})
if err != nil {
errChan <- err
break
}
}
}()
wg.Wait()
select {
case err, has := <-errChan:
if has {
return nil, lfsError("unable to obtain name for LFS files", err)
}
default:
}
return results, nil
err = fillResultNameRev(repo.Ctx, repo.Path, results)
return results, err
}
+44 -15
View File
@@ -4,25 +4,54 @@
package pipeline
import (
"bufio"
"context"
"fmt"
"io"
"sync"
"errors"
"strings"
"code.gitea.io/gitea/modules/git/gitcmd"
"golang.org/x/sync/errgroup"
)
// NameRevStdin runs name-rev --stdin
func NameRevStdin(ctx context.Context, shasToNameReader *io.PipeReader, nameRevStdinWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath string) {
defer wg.Done()
defer shasToNameReader.Close()
defer nameRevStdinWriter.Close()
func fillResultNameRev(ctx context.Context, basePath string, results []*LFSResult) error {
// Should really use a go-git function here but name-rev is not completed and recapitulating it is not simple
wg := errgroup.Group{}
cmd := gitcmd.NewCommand("name-rev", "--stdin", "--name-only", "--always").WithDir(basePath)
stdin, stdinClose := cmd.MakeStdinPipe()
stdout, stdoutClose := cmd.MakeStdoutPipe()
defer stdinClose()
defer stdoutClose()
if err := gitcmd.NewCommand("name-rev", "--stdin", "--name-only", "--always").
WithDir(tmpBasePath).
WithStdin(shasToNameReader).
WithStdout(nameRevStdinWriter).
RunWithStderr(ctx); err != nil {
_ = shasToNameReader.CloseWithError(fmt.Errorf("git name-rev [%s]: %w", tmpBasePath, err))
}
wg.Go(func() error {
scanner := bufio.NewScanner(stdout)
i := 0
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
continue
}
result := results[i]
result.FullCommitName = line
result.BranchName = strings.Split(line, "~")[0]
i++
}
return scanner.Err()
})
wg.Go(func() error {
defer stdinClose()
for _, result := range results {
_, err := stdin.Write([]byte(result.SHA))
if err != nil {
return err
}
_, err = stdin.Write([]byte{'\n'})
if err != nil {
return err
}
}
return nil
})
err := cmd.RunWithStderr(ctx)
return errors.Join(err, wg.Wait())
}
+9 -36
View File
@@ -6,52 +6,25 @@ package pipeline
import (
"bufio"
"context"
"fmt"
"io"
"strings"
"sync"
"code.gitea.io/gitea/modules/git/gitcmd"
)
// RevListAllObjects runs rev-list --objects --all and writes to a pipewriter
func RevListAllObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, basePath string, errChan chan<- error) {
defer wg.Done()
defer revListWriter.Close()
cmd := gitcmd.NewCommand("rev-list", "--objects", "--all")
if err := cmd.WithDir(basePath).
WithStdout(revListWriter).
RunWithStderr(ctx); err != nil {
_ = revListWriter.CloseWithError(fmt.Errorf("git rev-list --objects --all [%s]: %w", basePath, err))
errChan <- err
}
}
// RevListObjects run rev-list --objects from headSHA to baseSHA
func RevListObjects(ctx context.Context, revListWriter *io.PipeWriter, wg *sync.WaitGroup, tmpBasePath, headSHA, baseSHA string, errChan chan<- error) {
defer wg.Done()
defer revListWriter.Close()
cmd := gitcmd.NewCommand("rev-list", "--objects").AddDynamicArguments(headSHA)
func RevListObjects(ctx context.Context, cmd *gitcmd.Command, tmpBasePath, headSHA, baseSHA string) error {
cmd.AddArguments("rev-list", "--objects").AddDynamicArguments(headSHA)
if baseSHA != "" {
cmd = cmd.AddArguments("--not").AddDynamicArguments(baseSHA)
}
if err := cmd.WithDir(tmpBasePath).
WithStdout(revListWriter).
RunWithStderr(ctx); err != nil {
errChan <- fmt.Errorf("git rev-list [%s]: %w", tmpBasePath, err)
}
return cmd.WithDir(tmpBasePath).RunWithStderr(ctx)
}
// BlobsFromRevListObjects reads a RevListAllObjects and only selects blobs
func BlobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io.PipeWriter, wg *sync.WaitGroup) {
defer wg.Done()
defer revListReader.Close()
scanner := bufio.NewScanner(revListReader)
defer func() {
_ = shasToCheckWriter.CloseWithError(scanner.Err())
}()
func BlobsFromRevListObjects(in io.ReadCloser, out io.WriteCloser) error {
defer out.Close()
scanner := bufio.NewScanner(in)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 {
@@ -63,12 +36,12 @@ func BlobsFromRevListObjects(revListReader *io.PipeReader, shasToCheckWriter *io
}
toWrite := []byte(fields[0] + "\n")
for len(toWrite) > 0 {
n, err := shasToCheckWriter.Write(toWrite)
n, err := out.Write(toWrite)
if err != nil {
_ = revListReader.CloseWithError(err)
break
return err
}
toWrite = toWrite[n:]
}
}
return scanner.Err()
}