Skip to content
This repository has been archived by the owner on Sep 11, 2020. It is now read-only.

feat: improve clone performance and reduce memory usage. #1170

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 53 additions & 46 deletions plumbing/format/packfile/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"io"
"io/ioutil"

"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
Expand Down Expand Up @@ -263,11 +264,14 @@ func (p *Parser) indexObjects() error {
}

func (p *Parser) resolveDeltas() error {
buf := &bytes.Buffer{}
for _, obj := range p.oi {
content, err := p.get(obj)
buf.Reset()
err := p.get(obj, buf)
if err != nil {
return err
}
content := buf.Bytes()

if err := p.onInflatedObjectHeader(obj.Type, obj.Length, obj.Offset); err != nil {
return err
Expand All @@ -279,7 +283,7 @@ func (p *Parser) resolveDeltas() error {

if !obj.IsDelta() && len(obj.Children) > 0 {
for _, child := range obj.Children {
if _, err := p.resolveObject(child, content); err != nil {
if err := p.resolveObject(ioutil.Discard, child, content); err != nil {
return err
}
}
Expand All @@ -294,120 +298,123 @@ func (p *Parser) resolveDeltas() error {
return nil
}

func (p *Parser) get(o *objectInfo) (b []byte, err error) {
var ok bool
func (p *Parser) get(o *objectInfo, buf *bytes.Buffer) error {
if !o.ExternalRef { // skip cache check for placeholder parents
b, ok = p.cache.Get(o.Offset)
b, ok := p.cache.Get(o.Offset)
if ok {
_, err := buf.Write(b)
return err
}
}

// If it's not on the cache and is not a delta we can try to find it in the
// storage, if there's one. External refs must enter here.
if !ok && p.storage != nil && !o.Type.IsDelta() {
if p.storage != nil && !o.Type.IsDelta() {
e, err := p.storage.EncodedObject(plumbing.AnyObject, o.SHA1)
if err != nil {
return nil, err
return err
}
o.Type = e.Type()

r, err := e.Reader()
if err != nil {
return nil, err
}

b = make([]byte, e.Size())
if _, err = r.Read(b); err != nil {
return nil, err
return err
}
}

if b != nil {
return b, nil
_, err = buf.ReadFrom(io.LimitReader(r, e.Size()))
return err
}

if o.ExternalRef {
// we were not able to resolve a ref in a thin pack
return nil, ErrReferenceDeltaNotFound
return ErrReferenceDeltaNotFound
}

var data []byte
if o.DiskType.IsDelta() {
base, err := p.get(o.Parent)
b := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(b)
b.Reset()
err := p.get(o.Parent, b)
if err != nil {
return nil, err
return err
}
base := b.Bytes()

data, err = p.resolveObject(o, base)
err = p.resolveObject(buf, o, base)
if err != nil {
return nil, err
return err
}
} else {
data, err = p.readData(o)
err := p.readData(buf, o)
if err != nil {
return nil, err
return err
}
}

if len(o.Children) > 0 {
data := make([]byte, buf.Len())
copy(data, buf.Bytes())
p.cache.Put(o.Offset, data)
}

return data, nil
return nil
}

func (p *Parser) resolveObject(
w io.Writer,
o *objectInfo,
base []byte,
) ([]byte, error) {
) error {
if !o.DiskType.IsDelta() {
return nil, nil
return nil
}

data, err := p.readData(o)
buf := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(buf)
buf.Reset()
err := p.readData(buf, o)
if err != nil {
return nil, err
return err
}
data := buf.Bytes()

data, err = applyPatchBase(o, data, base)
if err != nil {
return nil, err
return err
}

if p.storage != nil {
obj := new(plumbing.MemoryObject)
obj.SetSize(o.Size())
obj.SetType(o.Type)
if _, err := obj.Write(data); err != nil {
return nil, err
return err
}

if _, err := p.storage.SetEncodedObject(obj); err != nil {
return nil, err
return err
}
}

return data, nil
_, err = w.Write(data)
return err
}

func (p *Parser) readData(o *objectInfo) ([]byte, error) {
func (p *Parser) readData(w io.Writer, o *objectInfo) error {
if !p.scanner.IsSeekable && o.DiskType.IsDelta() {
data, ok := p.deltas[o.Offset]
if !ok {
return nil, ErrDeltaNotCached
return ErrDeltaNotCached
}

return data, nil
_, err := w.Write(data)
return err
}

if _, err := p.scanner.SeekObjectHeader(o.Offset); err != nil {
return nil, err
return err
}

buf := new(bytes.Buffer)
if _, _, err := p.scanner.NextObject(buf); err != nil {
return nil, err
if _, _, err := p.scanner.NextObject(w); err != nil {
return err
}

return buf.Bytes(), nil
return nil
}

func applyPatchBase(ota *objectInfo, data, base []byte) ([]byte, error) {
Expand Down
53 changes: 36 additions & 17 deletions plumbing/format/packfile/patch_delta.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
package packfile

import (
"bytes"
"errors"
"io/ioutil"
"io"

"gopkg.in/src-d/go-git.v4/plumbing"
)
Expand All @@ -26,19 +27,29 @@ func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) error {
return err
}

src, err := ioutil.ReadAll(r)
buf := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(buf)
buf.Reset()
_, err = buf.ReadFrom(r)
if err != nil {
return err
}
src := buf.Bytes()

dst, err := PatchDelta(src, delta)
dst := bufPool.Get().(*bytes.Buffer)
defer bufPool.Put(dst)
dst.Reset()
err = patchDelta(dst, src, delta)
if err != nil {
return err
}

target.SetSize(int64(len(dst)))

_, err = w.Write(dst)
target.SetSize(int64(dst.Len()))

b := byteSlicePool.Get().([]byte)
_, err = io.CopyBuffer(w, dst, b)
byteSlicePool.Put(b)
return err
}

Expand All @@ -51,23 +62,31 @@ var (
// An error will be returned if delta is corrupted (ErrDeltaLen) or an action command
// is not copy from source or copy from delta (ErrDeltaCmd).
func PatchDelta(src, delta []byte) ([]byte, error) {
b := &bytes.Buffer{}
if err := patchDelta(b, src, delta); err != nil {
return nil, err
}
return b.Bytes(), nil
}

func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
if len(delta) < deltaSizeMin {
return nil, ErrInvalidDelta
return ErrInvalidDelta
}

srcSz, delta := decodeLEB128(delta)
if srcSz != uint(len(src)) {
return nil, ErrInvalidDelta
return ErrInvalidDelta
}

targetSz, delta := decodeLEB128(delta)
remainingTargetSz := targetSz

var cmd byte
dest := make([]byte, 0, targetSz)
dst.Grow(int(targetSz))
for {
if len(delta) == 0 {
return nil, ErrInvalidDelta
return ErrInvalidDelta
}

cmd = delta[0]
Expand All @@ -77,43 +96,43 @@ func PatchDelta(src, delta []byte) ([]byte, error) {
var err error
offset, delta, err = decodeOffset(cmd, delta)
if err != nil {
return nil, err
return err
}

sz, delta, err = decodeSize(cmd, delta)
if err != nil {
return nil, err
return err
}

if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
break
}
dest = append(dest, src[offset:offset+sz]...)
dst.Write(src[offset:offset+sz])
remainingTargetSz -= sz
} else if isCopyFromDelta(cmd) {
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
return nil, ErrInvalidDelta
return ErrInvalidDelta
}

if uint(len(delta)) < sz {
return nil, ErrInvalidDelta
return ErrInvalidDelta
}

dest = append(dest, delta[0:sz]...)
dst.Write(delta[0:sz])
remainingTargetSz -= sz
delta = delta[sz:]
} else {
return nil, ErrDeltaCmd
return ErrDeltaCmd
}

if remainingTargetSz <= 0 {
break
}
}

return dest, nil
return nil
}

// Decodes a number encoded as an unsigned LEB128 at the start of some
Expand Down
20 changes: 20 additions & 0 deletions repository_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"golang.org/x/crypto/openpgp"
"golang.org/x/crypto/openpgp/armor"
openpgperr "golang.org/x/crypto/openpgp/errors"

"gopkg.in/src-d/go-git.v4/config"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/cache"
Expand Down Expand Up @@ -2671,3 +2672,22 @@ func BenchmarkObjects(b *testing.B) {
})
}
}

func BenchmarkPlainClone(b *testing.B) {
for i := 0; i < b.N; i++ {
t, err := ioutil.TempDir("", "")
if err != nil {
b.Fatal(err)
}
_, err = PlainClone(t, false, &CloneOptions{
URL: "https://github.com/knqyf263/vuln-list",
Depth: 1,
})
if err != nil {
b.Error(err)
}
b.StopTimer()
os.RemoveAll(t)
b.StartTimer()
}
}
8 changes: 7 additions & 1 deletion storage/filesystem/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ func (s *IndexStorage) SetIndex(idx *index.Index) (err error) {
}

defer ioutil.CheckClose(f, &err)
bw := bufio.NewWriter(f)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you found substantial difference using a buffered writer to save the index?

Copy link
Contributor Author

@orisano orisano Jul 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

local benchmark result. reverted bufio.Writer.

name          old time/op    new time/op    delta
PlainClone-4      173s ± 5%      251s ± 9%  +44.84%  (p=0.008 n=5+5)

name          old alloc/op   new alloc/op   delta
PlainClone-4    29.8GB ± 0%    29.4GB ± 0%   -1.12%  (p=0.008 n=5+5)

name          old allocs/op  new allocs/op  delta
PlainClone-4      114M ± 0%      114M ± 0%   -0.04%  (p=0.008 n=5+5)

defer func() {
if e := bw.Flush(); err == nil && e != nil {
err = e
}
}()

e := index.NewEncoder(f)
e := index.NewEncoder(bw)
err = e.Encode(idx)
return err
}
Expand Down
Loading