Skip to content

Commit

Permalink
Implement lazy iteration (ForEach) over collections
Browse files Browse the repository at this point in the history
See #761

aptly had a concept of loading small amount of info per each object
into memory once collection is accessed for the first time.

This might have simplified some operations, but it doesn't scale well
with huge aptly databases.

This is just intermediate step towards better memory management -
list of objects is not loaded unless some method is called.
`ForEach` method (mainly used in cleanup) is reimplemented to
iterate over database without ever loading all the objects into memory.

Memory was even worse with previous approach, as for each item usually
`LoadComplete()` is called, which pulls even more data into memory
and item stays in memory till the end of the iteration as it is referenced
from `collection.list`.

For the subsequent PR: reimplement `ByUUID()` and probably other methods
to avoid loading all the items into memory, at least for all the collecitons
except for published repos. When published repository is being loaded, it
might pull source local repo which in turn would trigger loading for all the
local repos which is not acceptable.
  • Loading branch information
smira committed Aug 3, 2018
1 parent 86a1c41 commit 0f4bbc4
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 53 deletions.
43 changes: 30 additions & 13 deletions deb/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,28 +99,34 @@ type LocalRepoCollection struct {

// NewLocalRepoCollection loads LocalRepos from DB and makes up collection
func NewLocalRepoCollection(db database.Storage) *LocalRepoCollection {
result := &LocalRepoCollection{
return &LocalRepoCollection{
RWMutex: &sync.RWMutex{},
db: db,
}
}

func (collection *LocalRepoCollection) loadList() {
if collection.list != nil {
return
}

blobs := db.FetchByPrefix([]byte("L"))
result.list = make([]*LocalRepo, 0, len(blobs))
blobs := collection.db.FetchByPrefix([]byte("L"))
collection.list = make([]*LocalRepo, 0, len(blobs))

for _, blob := range blobs {
r := &LocalRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding repo: %s\n", err)
} else {
result.list = append(result.list, r)
collection.list = append(collection.list, r)
}
}

return result
}

// Add appends new repo to collection and saves it
func (collection *LocalRepoCollection) Add(repo *LocalRepo) error {
collection.loadList()

for _, r := range collection.list {
if r.Name == repo.Name {
return fmt.Errorf("local repo with name %s already exists", repo.Name)
Expand Down Expand Up @@ -153,6 +159,8 @@ func (collection *LocalRepoCollection) Update(repo *LocalRepo) error {

// LoadComplete loads additional information for local repo
func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error {
collection.loadList()

encoded, err := collection.db.Get(repo.RefKey())
if err == database.ErrNotFound {
return nil
Expand All @@ -167,6 +175,8 @@ func (collection *LocalRepoCollection) LoadComplete(repo *LocalRepo) error {

// ByName looks up repository by name
func (collection *LocalRepoCollection) ByName(name string) (*LocalRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.Name == name {
return r, nil
Expand All @@ -177,6 +187,8 @@ func (collection *LocalRepoCollection) ByName(name string) (*LocalRepo, error) {

// ByUUID looks up repository by uuid
func (collection *LocalRepoCollection) ByUUID(uuid string) (*LocalRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.UUID == uuid {
return r, nil
Expand All @@ -187,23 +199,28 @@ func (collection *LocalRepoCollection) ByUUID(uuid string) (*LocalRepo, error) {

// ForEach runs method for each repository
func (collection *LocalRepoCollection) ForEach(handler func(*LocalRepo) error) error {
var err error
for _, r := range collection.list {
err = handler(r)
if err != nil {
return err
return collection.db.ProcessByPrefix([]byte("L"), func(key, blob []byte) error {
r := &LocalRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding repo: %s\n", err)
return nil
}
}
return err

return handler(r)
})
}

// Len returns number of remote repos
func (collection *LocalRepoCollection) Len() int {
collection.loadList()

return len(collection.list)
}

// Drop removes remote repo from collection
func (collection *LocalRepoCollection) Drop(repo *LocalRepo) error {
collection.loadList()

repoPosition := -1

for i, r := range collection.list {
Expand Down
50 changes: 37 additions & 13 deletions deb/publish.go
Original file line number Diff line number Diff line change
Expand Up @@ -852,28 +852,34 @@ type PublishedRepoCollection struct {

// NewPublishedRepoCollection loads PublishedRepos from DB and makes up collection
func NewPublishedRepoCollection(db database.Storage) *PublishedRepoCollection {
result := &PublishedRepoCollection{
return &PublishedRepoCollection{
RWMutex: &sync.RWMutex{},
db: db,
}
}

func (collection *PublishedRepoCollection) loadList() {
if collection.list != nil {
return
}

blobs := db.FetchByPrefix([]byte("U"))
result.list = make([]*PublishedRepo, 0, len(blobs))
blobs := collection.db.FetchByPrefix([]byte("U"))
collection.list = make([]*PublishedRepo, 0, len(blobs))

for _, blob := range blobs {
r := &PublishedRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding published repo: %s\n", err)
} else {
result.list = append(result.list, r)
collection.list = append(collection.list, r)
}
}

return result
}

// Add appends new repo to collection and saves it
func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error {
collection.loadList()

if collection.CheckDuplicate(repo) != nil {
return fmt.Errorf("published repo with storage/prefix/distribution %s/%s/%s already exists", repo.Storage, repo.Prefix, repo.Distribution)
}
Expand All @@ -889,6 +895,8 @@ func (collection *PublishedRepoCollection) Add(repo *PublishedRepo) error {

// CheckDuplicate verifies that there's no published repo with the same name
func (collection *PublishedRepoCollection) CheckDuplicate(repo *PublishedRepo) *PublishedRepo {
collection.loadList()

for _, r := range collection.list {
if r.Prefix == repo.Prefix && r.Distribution == repo.Distribution && r.Storage == repo.Storage {
return r
Expand Down Expand Up @@ -978,6 +986,8 @@ func (collection *PublishedRepoCollection) LoadComplete(repo *PublishedRepo, col

// ByStoragePrefixDistribution looks up repository by storage, prefix & distribution
func (collection *PublishedRepoCollection) ByStoragePrefixDistribution(storage, prefix, distribution string) (*PublishedRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.Prefix == prefix && r.Distribution == distribution && r.Storage == storage {
return r, nil
Expand All @@ -991,6 +1001,8 @@ func (collection *PublishedRepoCollection) ByStoragePrefixDistribution(storage,

// ByUUID looks up repository by uuid
func (collection *PublishedRepoCollection) ByUUID(uuid string) (*PublishedRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.UUID == uuid {
return r, nil
Expand All @@ -1001,6 +1013,8 @@ func (collection *PublishedRepoCollection) ByUUID(uuid string) (*PublishedRepo,

// BySnapshot looks up repository by snapshot source
func (collection *PublishedRepoCollection) BySnapshot(snapshot *Snapshot) []*PublishedRepo {
collection.loadList()

var result []*PublishedRepo
for _, r := range collection.list {
if r.SourceKind == SourceSnapshot {
Expand All @@ -1021,6 +1035,8 @@ func (collection *PublishedRepoCollection) BySnapshot(snapshot *Snapshot) []*Pub

// ByLocalRepo looks up repository by local repo source
func (collection *PublishedRepoCollection) ByLocalRepo(repo *LocalRepo) []*PublishedRepo {
collection.loadList()

var result []*PublishedRepo
for _, r := range collection.list {
if r.SourceKind == SourceLocalRepo {
Expand All @@ -1041,25 +1057,30 @@ func (collection *PublishedRepoCollection) ByLocalRepo(repo *LocalRepo) []*Publi

// ForEach runs method for each repository
func (collection *PublishedRepoCollection) ForEach(handler func(*PublishedRepo) error) error {
var err error
for _, r := range collection.list {
err = handler(r)
if err != nil {
return err
return collection.db.ProcessByPrefix([]byte("U"), func(key, blob []byte) error {
r := &PublishedRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding published repo: %s\n", err)
return nil
}
}
return err

return handler(r)
})
}

// Len returns number of remote repos
func (collection *PublishedRepoCollection) Len() int {
collection.loadList()

return len(collection.list)
}

// CleanupPrefixComponentFiles removes all unreferenced files in published storage under prefix/component pair
func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix string, components []string,
publishedStorage aptly.PublishedStorage, collectionFactory *CollectionFactory, progress aptly.Progress) error {

collection.loadList()

var err error
referencedFiles := map[string][]string{}

Expand Down Expand Up @@ -1141,6 +1162,9 @@ func (collection *PublishedRepoCollection) CleanupPrefixComponentFiles(prefix st
func (collection *PublishedRepoCollection) Remove(publishedStorageProvider aptly.PublishedStorageProvider,
storage, prefix, distribution string, collectionFactory *CollectionFactory, progress aptly.Progress,
force, skipCleanup bool) error {

collection.loadList()

repo, err := collection.ByStoragePrefixDistribution(storage, prefix, distribution)
if err != nil {
return err
Expand Down
41 changes: 28 additions & 13 deletions deb/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -660,28 +660,34 @@ type RemoteRepoCollection struct {

// NewRemoteRepoCollection loads RemoteRepos from DB and makes up collection
func NewRemoteRepoCollection(db database.Storage) *RemoteRepoCollection {
result := &RemoteRepoCollection{
return &RemoteRepoCollection{
RWMutex: &sync.RWMutex{},
db: db,
}
}

func (collection *RemoteRepoCollection) loadList() {
if collection.list != nil {
return
}

blobs := db.FetchByPrefix([]byte("R"))
result.list = make([]*RemoteRepo, 0, len(blobs))
blobs := collection.db.FetchByPrefix([]byte("R"))
collection.list = make([]*RemoteRepo, 0, len(blobs))

for _, blob := range blobs {
r := &RemoteRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding mirror: %s\n", err)
} else {
result.list = append(result.list, r)
collection.list = append(collection.list, r)
}
}

return result
}

// Add appends new repo to collection and saves it
func (collection *RemoteRepoCollection) Add(repo *RemoteRepo) error {
collection.loadList()

for _, r := range collection.list {
if r.Name == repo.Name {
return fmt.Errorf("mirror with name %s already exists", repo.Name)
Expand Down Expand Up @@ -728,6 +734,8 @@ func (collection *RemoteRepoCollection) LoadComplete(repo *RemoteRepo) error {

// ByName looks up repository by name
func (collection *RemoteRepoCollection) ByName(name string) (*RemoteRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.Name == name {
return r, nil
Expand All @@ -738,6 +746,8 @@ func (collection *RemoteRepoCollection) ByName(name string) (*RemoteRepo, error)

// ByUUID looks up repository by uuid
func (collection *RemoteRepoCollection) ByUUID(uuid string) (*RemoteRepo, error) {
collection.loadList()

for _, r := range collection.list {
if r.UUID == uuid {
return r, nil
Expand All @@ -748,23 +758,28 @@ func (collection *RemoteRepoCollection) ByUUID(uuid string) (*RemoteRepo, error)

// ForEach runs method for each repository
func (collection *RemoteRepoCollection) ForEach(handler func(*RemoteRepo) error) error {
var err error
for _, r := range collection.list {
err = handler(r)
if err != nil {
return err
return collection.db.ProcessByPrefix([]byte("R"), func(key, blob []byte) error {
r := &RemoteRepo{}
if err := r.Decode(blob); err != nil {
log.Printf("Error decoding mirror: %s\n", err)
return nil
}
}
return err

return handler(r)
})
}

// Len returns number of remote repos
func (collection *RemoteRepoCollection) Len() int {
collection.loadList()

return len(collection.list)
}

// Drop removes remote repo from collection
func (collection *RemoteRepoCollection) Drop(repo *RemoteRepo) error {
collection.loadList()

repoPosition := -1

for i, r := range collection.list {
Expand Down
Loading

0 comments on commit 0f4bbc4

Please sign in to comment.