From ebd9fb225390f534bb01953bb66feeedeec7b28c Mon Sep 17 00:00:00 2001 From: Unknwon Date: Sun, 13 Dec 2015 22:57:47 -0500 Subject: [PATCH] speed up for getting a lot commits --- command.go | 2 +- commit.go | 36 +++++++++++--------- git.go | 2 +- repo.go | 10 ++++-- repo_commit.go | 12 +++---- repo_tag.go | 15 ++++---- tree_entry.go | 92 ++++++++++++++++++++++++++++++++++++++++++++++---- utlis.go | 28 +++++++++++++++ 8 files changed, 155 insertions(+), 42 deletions(-) diff --git a/command.go b/command.go index 0b2fdcb..f7fce67 100644 --- a/command.go +++ b/command.go @@ -95,7 +95,7 @@ func (c *Command) RunInDirTimeout(timeout time.Duration, dir string) ([]byte, er } if stdout.Len() > 0 { - log("stdout:\n%s", stdout) + log("stdout:\n%s", stdout.Bytes()[:1024]) } return stdout.Bytes(), nil } diff --git a/commit.go b/commit.go index f1d48b9..9b89c62 100644 --- a/commit.go +++ b/commit.go @@ -23,8 +23,8 @@ type Commit struct { Committer *Signature CommitMessage string - parents []sha1 // SHA1 strings - submodules map[string]*SubModule + parents []sha1 // SHA1 strings + submoduleCache *objectCache } // Message returns the commit message. Same as retrieving CommitMessage directly. @@ -180,17 +180,9 @@ func (c *Commit) SearchCommits(keyword string) (*list.List, error) { return c.repo.searchCommits(c.ID, keyword) } -func (c *Commit) GetSubModule(entryname string) (*SubModule, error) { - modules, err := c.GetSubModules() - if err != nil { - return nil, err - } - return modules[entryname], nil -} - -func (c *Commit) GetSubModules() (map[string]*SubModule, error) { - if c.submodules != nil { - return c.submodules, nil +func (c *Commit) GetSubModules() (*objectCache, error) { + if c.submoduleCache != nil { + return c.submoduleCache, nil } entry, err := c.GetTreeEntryByPath(".gitmodules") @@ -203,7 +195,6 @@ func (c *Commit) GetSubModules() (map[string]*SubModule, error) { } scanner := bufio.NewScanner(rd) - c.submodules = make(map[string]*SubModule) var ismodule bool var path string for scanner.Scan() { @@ -217,11 +208,24 @@ func (c *Commit) GetSubModules() (map[string]*SubModule, error) { if k == "path" { path = strings.TrimSpace(fields[1]) } else if k == "url" { - c.submodules[path] = &SubModule{path, strings.TrimSpace(fields[1])} + c.submoduleCache.Set(path, &SubModule{path, strings.TrimSpace(fields[1])}) ismodule = false } } } - return c.submodules, nil + return c.submoduleCache, nil +} + +func (c *Commit) GetSubModule(entryname string) (*SubModule, error) { + modules, err := c.GetSubModules() + if err != nil { + return nil, err + } + + module, has := modules.Get(entryname) + if has { + return module.(*SubModule), nil + } + return nil, nil } diff --git a/git.go b/git.go index a175f29..3fba0eb 100644 --- a/git.go +++ b/git.go @@ -10,7 +10,7 @@ import ( "time" ) -const _VERSION = "0.1.1" +const _VERSION = "0.2.0" func Version() string { return _VERSION diff --git a/repo.go b/repo.go index 60e8b89..12aca7e 100644 --- a/repo.go +++ b/repo.go @@ -18,8 +18,8 @@ import ( type Repository struct { Path string - commitCache map[sha1]*Commit - tagCache map[sha1]*Tag + commitCache *objectCache + tagCache *objectCache } const _PRETTY_LOG_FORMAT = `--pretty=format:%H` @@ -64,7 +64,11 @@ func OpenRepository(repoPath string) (*Repository, error) { return nil, errors.New("no such file or directory") } - return &Repository{Path: repoPath}, nil + return &Repository{ + Path: repoPath, + commitCache: newObjectCache(), + tagCache: newObjectCache(), + }, nil } type CloneRepoOptions struct { diff --git a/repo_commit.go b/repo_commit.go index d409b6a..c321296 100644 --- a/repo_commit.go +++ b/repo_commit.go @@ -36,6 +36,7 @@ func (repo *Repository) GetTagCommitID(name string) (string, error) { // \n\n separate headers from message func parseCommitData(data []byte) (*Commit, error) { commit := new(Commit) + commit.submoduleCache = newObjectCache() commit.parents = make([]sha1, 0, 1) // we now have the contents of the commit object. Let's investigate... nextline := 0 @@ -86,13 +87,10 @@ l: } func (repo *Repository) getCommit(id sha1) (*Commit, error) { - if repo.commitCache != nil { + c, ok := repo.commitCache.Get(id.String()) + if ok { log("Hit cache: %s", id) - if c, ok := repo.commitCache[id]; ok { - return c, nil - } - } else { - repo.commitCache = make(map[sha1]*Commit, 10) + return c.(*Commit), nil } data, err := NewCommand("cat-file", "-p", id.String()).RunInDirBytes(repo.Path) @@ -107,7 +105,7 @@ func (repo *Repository) getCommit(id sha1) (*Commit, error) { commit.repo = repo commit.ID = id - repo.commitCache[id] = commit + repo.commitCache.Set(id.String(), commit) return commit, nil } diff --git a/repo_tag.go b/repo_tag.go index f0fdc44..b4dc69b 100644 --- a/repo_tag.go +++ b/repo_tag.go @@ -27,12 +27,10 @@ func (repo *Repository) CreateTag(name, revision string) error { } func (repo *Repository) getTag(id sha1) (*Tag, error) { - if repo.tagCache != nil { - if t, ok := repo.tagCache[id]; ok { - return t, nil - } - } else { - repo.tagCache = make(map[sha1]*Tag, 10) + t, ok := repo.tagCache.Get(id.String()) + if ok { + log("Hit cache: %s", id) + return t.(*Tag), nil } // Get tag type @@ -50,7 +48,8 @@ func (repo *Repository) getTag(id sha1) (*Tag, error) { Type: string(OBJECT_COMMIT), repo: repo, } - repo.tagCache[id] = tag + + repo.tagCache.Set(id.String(), tag) return tag, nil } @@ -68,7 +67,7 @@ func (repo *Repository) getTag(id sha1) (*Tag, error) { tag.ID = id tag.repo = repo - repo.tagCache[id] = tag + repo.tagCache.Set(id.String(), tag) return tag, nil } diff --git a/tree_entry.go b/tree_entry.go index 5825846..0fe11b9 100644 --- a/tree_entry.go +++ b/tree_entry.go @@ -5,6 +5,9 @@ package git import ( + "fmt" + "path" + "path/filepath" "sort" "strconv" "strings" @@ -84,10 +87,10 @@ var sorter = []func(t1, t2 *TreeEntry) bool{ }, } -func (bs Entries) Len() int { return len(bs) } -func (bs Entries) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] } -func (bs Entries) Less(i, j int) bool { - t1, t2 := bs[i], bs[j] +func (tes Entries) Len() int { return len(tes) } +func (tes Entries) Swap(i, j int) { tes[i], tes[j] = tes[j], tes[i] } +func (tes Entries) Less(i, j int) bool { + t1, t2 := tes[i], tes[j] var k int for k = 0; k < len(sorter)-1; k++ { sort := sorter[k] @@ -101,6 +104,83 @@ func (bs Entries) Less(i, j int) bool { return sorter[k](t1, t2) } -func (bs Entries) Sort() { - sort.Sort(bs) +func (tes Entries) Sort() { + sort.Sort(tes) +} + +type commitInfo struct { + id string + infos []interface{} + err error +} + +// GetCommitsInfo takes advantages of concurrey to speed up getting information +// of all commits that are corresponding to these entries. +// TODO: limit max goroutines at same time +func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) { + if len(tes) == 0 { + return nil, nil + } + + revChan := make(chan commitInfo, 10) + + infoMap := make(map[string][]interface{}, len(tes)) + for i := range tes { + if tes[i].Type != OBJECT_COMMIT { + go func(i int) { + cinfo := commitInfo{id: tes[i].ID.String()} + c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) + if err != nil { + cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) + } else { + cinfo.infos = []interface{}{tes[i], c} + } + revChan <- cinfo + }(i) + continue + } + + // Handle submodule + go func(i int) { + cinfo := commitInfo{id: tes[i].ID.String()} + sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name())) + if err != nil { + cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err) + revChan <- cinfo + return + } + + smUrl := "" + if sm != nil { + smUrl = sm.Url + } + + c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) + if err != nil { + cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) + } else { + cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smUrl, tes[i].ID.String())} + } + revChan <- cinfo + }(i) + } + + i := 0 + for info := range revChan { + if info.err != nil { + return nil, info.err + } + + infoMap[info.id] = info.infos + i++ + if i == len(tes) { + break + } + } + + commitsInfo := make([][]interface{}, len(tes)) + for i := 0; i < len(tes); i++ { + commitsInfo[i] = infoMap[tes[i].ID.String()] + } + return commitsInfo, nil } diff --git a/utlis.go b/utlis.go index 14d6cd3..cfaaee3 100644 --- a/utlis.go +++ b/utlis.go @@ -9,8 +9,36 @@ import ( "os" "path/filepath" "strings" + "sync" ) +// objectCache provides thread-safe cache opeations. +type objectCache struct { + lock sync.RWMutex + cache map[string]interface{} +} + +func newObjectCache() *objectCache { + return &objectCache{ + cache: make(map[string]interface{}, 10), + } +} + +func (oc *objectCache) Set(id string, obj interface{}) { + oc.lock.Lock() + defer oc.lock.Unlock() + + oc.cache[id] = obj +} + +func (oc *objectCache) Get(id string) (interface{}, bool) { + oc.lock.RLock() + defer oc.lock.RUnlock() + + obj, has := oc.cache[id] + return obj, has +} + // isDir returns true if given path is a directory, // or returns false when it's a file or does not exist. func isDir(dir string) bool {