package analyze import ( "io" "os" "path/filepath" "runtime/debug" "sync" "time" "b612.me/apps/b612/gdu/internal/common" "b612.me/apps/b612/gdu/pkg/fs" log "github.com/sirupsen/logrus" ) // StoredAnalyzer implements Analyzer type StoredAnalyzer struct { storage *Storage storagePath string progress *common.CurrentProgress progressChan chan common.CurrentProgress progressOutChan chan common.CurrentProgress progressDoneChan chan struct{} doneChan common.SignalGroup wait *WaitGroup ignoreDir common.ShouldDirBeIgnored followSymlinks bool gitAnnexedSize bool } // CreateStoredAnalyzer returns Analyzer func CreateStoredAnalyzer(storagePath string) *StoredAnalyzer { return &StoredAnalyzer{ storagePath: storagePath, progress: &common.CurrentProgress{ ItemCount: 0, TotalSize: int64(0), }, progressChan: make(chan common.CurrentProgress, 1), progressOutChan: make(chan common.CurrentProgress, 1), progressDoneChan: make(chan struct{}), doneChan: make(common.SignalGroup), wait: (&WaitGroup{}).Init(), } } // GetProgressChan returns channel for getting progress func (a *StoredAnalyzer) GetProgressChan() chan common.CurrentProgress { return a.progressOutChan } // GetDone returns channel for checking when analysis is done func (a *StoredAnalyzer) GetDone() common.SignalGroup { return a.doneChan } func (a *StoredAnalyzer) SetFollowSymlinks(v bool) { a.followSymlinks = v } func (a *StoredAnalyzer) SetShowAnnexedSize(v bool) { a.gitAnnexedSize = v } // ResetProgress returns progress func (a *StoredAnalyzer) ResetProgress() { a.progress = &common.CurrentProgress{} a.progressChan = make(chan common.CurrentProgress, 1) a.progressOutChan = make(chan common.CurrentProgress, 1) a.progressDoneChan = make(chan struct{}) a.doneChan = make(common.SignalGroup) a.wait = (&WaitGroup{}).Init() } // AnalyzeDir analyzes given path func (a *StoredAnalyzer) AnalyzeDir( path string, ignore common.ShouldDirBeIgnored, constGC bool, ) fs.Item { if !constGC { defer debug.SetGCPercent(debug.SetGCPercent(-1)) go manageMemoryUsage(a.doneChan) } a.storage = NewStorage(a.storagePath, path) closeFn := a.storage.Open() defer func() { // nasty hack to close storage after all goroutines are done // Wait returns immediately if value is 0 // few last goroutines might still start after that time.Sleep(1 * time.Second) closeFn() }() a.ignoreDir = ignore go a.updateProgress() dir := a.processDir(path) a.wait.Wait() a.progressDoneChan <- struct{}{} a.doneChan.Broadcast() return dir } func (a *StoredAnalyzer) processDir(path string) *StoredDir { var ( file *File err error totalSize int64 info os.FileInfo dirCount int ) a.wait.Add(1) files, err := os.ReadDir(path) if err != nil { log.Print(err.Error()) } dir := &StoredDir{ Dir: &Dir{ File: &File{ Name: filepath.Base(path), Flag: getDirFlag(err, len(files)), }, BasePath: filepath.Dir(path), ItemCount: 1, Files: make(fs.Files, 0, len(files)), }, } parent := &ParentDir{Path: path} setDirPlatformSpecificAttrs(dir.Dir, path) for _, f := range files { name := f.Name() entryPath := filepath.Join(path, name) if f.IsDir() { if a.ignoreDir(name, entryPath) { continue } dirCount++ subdir := &StoredDir{ &Dir{ File: &File{ Name: name, }, BasePath: path, }, nil, sync.Mutex{}, } dir.AddFile(subdir) go func(entryPath string) { concurrencyLimit <- struct{}{} a.processDir(entryPath) <-concurrencyLimit }(entryPath) } else { info, err = f.Info() if err != nil { log.Print(err.Error()) continue } file = &File{ Name: name, Flag: getFlag(info), Size: info.Size(), Parent: parent, } setPlatformSpecificAttrs(file, info) totalSize += info.Size() dir.AddFile(file) } } err = a.storage.StoreDir(dir) if err != nil { log.Print(err.Error()) } a.wait.Done() a.progressChan <- common.CurrentProgress{ CurrentItemName: path, ItemCount: len(files), TotalSize: totalSize, } return dir } func (a *StoredAnalyzer) updateProgress() { for { select { case <-a.progressDoneChan: return case progress := <-a.progressChan: a.progress.CurrentItemName = progress.CurrentItemName a.progress.ItemCount += progress.ItemCount a.progress.TotalSize += progress.TotalSize } select { case a.progressOutChan <- *a.progress: default: } } } // StoredDir implements Dir item stored on disk type StoredDir struct { *Dir cachedFiles fs.Files dbLock sync.Mutex } // GetParent returns parent dir func (f *StoredDir) GetParent() fs.Item { if DefaultStorage.GetTopDir() == f.GetPath() { return nil } if !DefaultStorage.IsOpen() { closeFn := DefaultStorage.Open() defer closeFn() } dir, err := DefaultStorage.GetDirForPath(f.BasePath) if err != nil { log.Print(err.Error()) } return dir } // GetFiles returns files in directory // If files are already cached, return them // Otherwise load them from storage func (f *StoredDir) GetFiles() fs.Files { if f.cachedFiles != nil { return f.cachedFiles } if !DefaultStorage.IsOpen() { f.dbLock.Lock() defer f.dbLock.Unlock() closeFn := DefaultStorage.Open() defer closeFn() } var files fs.Files for _, file := range f.Files { if file.IsDir() { dir := &StoredDir{ &Dir{ File: &File{ Name: file.GetName(), }, BasePath: f.GetPath(), }, nil, sync.Mutex{}, } err := DefaultStorage.LoadDir(dir) if err != nil { log.Print(err.Error()) } files = append(files, dir) } else { files = append(files, file) } } f.cachedFiles = files return files } // SetFiles sets files in directory func (f *StoredDir) SetFiles(files fs.Files) { f.Files = files } // RemoveFile removes file from stored directory // It also updates size and item count of parent directories func (f *StoredDir) RemoveFile(item fs.Item) { if !DefaultStorage.IsOpen() { f.dbLock.Lock() defer f.dbLock.Unlock() closeFn := DefaultStorage.Open() defer closeFn() } f.SetFiles(f.GetFiles().Remove(item)) f.cachedFiles = nil cur := f for { cur.ItemCount -= item.GetItemCount() cur.Size -= item.GetSize() cur.Usage -= item.GetUsage() err := DefaultStorage.StoreDir(cur) if err != nil { log.Print(err.Error()) } parent := cur.GetParent() if parent == nil { break } cur = parent.(*StoredDir) } } // GetItemStats returns item count, apparent usage and real usage of this dir func (f *StoredDir) GetItemStats(linkedItems fs.HardLinkedItems) (itemCount int, size, usage int64) { f.UpdateStats(linkedItems) return f.ItemCount, f.GetSize(), f.GetUsage() } // UpdateStats recursively updates size and item count func (f *StoredDir) UpdateStats(linkedItems fs.HardLinkedItems) { if !DefaultStorage.IsOpen() { closeFn := DefaultStorage.Open() defer closeFn() } totalSize := int64(4096) totalUsage := int64(4096) var itemCount int f.cachedFiles = nil for _, entry := range f.GetFiles() { count, size, usage := entry.GetItemStats(linkedItems) totalSize += size totalUsage += usage itemCount += count if entry.GetMtime().After(f.Mtime) { f.Mtime = entry.GetMtime() } switch entry.GetFlag() { case '!', '.': if f.Flag != '!' { f.Flag = '.' } } } f.cachedFiles = nil f.ItemCount = itemCount + 1 f.Size = totalSize f.Usage = totalUsage err := DefaultStorage.StoreDir(f) if err != nil { log.Print(err.Error()) } } // ParentDir represents parent directory of single file // It is used to get path to parent directory of a file type ParentDir struct { Path string } func (p *ParentDir) GetPath() string { return p.Path } func (p *ParentDir) GetName() string { panic("must not be called") } func (p *ParentDir) GetFlag() rune { panic("must not be called") } func (p *ParentDir) IsDir() bool { panic("must not be called") } func (p *ParentDir) GetSize() int64 { panic("must not be called") } func (p *ParentDir) GetType() string { panic("must not be called") } func (p *ParentDir) GetUsage() int64 { panic("must not be called") } func (p *ParentDir) GetMtime() time.Time { panic("must not be called") } func (p *ParentDir) GetItemCount() int { panic("must not be called") } func (p *ParentDir) GetParent() fs.Item { panic("must not be called") } func (p *ParentDir) SetParent(fs.Item) { panic("must not be called") } func (p *ParentDir) GetMultiLinkedInode() uint64 { panic("must not be called") } func (p *ParentDir) EncodeJSON(writer io.Writer, topLevel bool) error { panic("must not be called") } func (p *ParentDir) UpdateStats(linkedItems fs.HardLinkedItems) { panic("must not be called") } func (p *ParentDir) AddFile(fs.Item) { panic("must not be called") } func (p *ParentDir) GetFiles() fs.Files { panic("must not be called") } func (p *ParentDir) GetFilesLocked() fs.Files { panic("must not be called") } func (p *ParentDir) RLock() func() { panic("must not be called") } func (p *ParentDir) SetFiles(fs.Files) { panic("must not be called") } func (p *ParentDir) RemoveFile(item fs.Item) { panic("must not be called") } func (p *ParentDir) GetItemStats( linkedItems fs.HardLinkedItems, ) (itemCount int, size, usage int64) { panic("must not be called") }