diff options
| author | Mitch Riedstra <mitch@riedstra.us> | 2020-10-25 16:49:46 -0400 |
|---|---|---|
| committer | Mitch Riedstra <mitch@riedstra.us> | 2020-10-25 16:49:46 -0400 |
| commit | 7752715a53155e4809f91e5894dfb2e9b3d35544 (patch) | |
| tree | b9ac015df5879142f87c316097487c37f67534c3 | |
| parent | 9a8ca79ddb3b5f309c1ee6c7a8713a38fc81fe2b (diff) | |
| download | deduplicator-7752715a53155e4809f91e5894dfb2e9b3d35544.tar.gz deduplicator-7752715a53155e4809f91e5894dfb2e9b3d35544.tar.xz | |
It works for my purposes
| -rw-r--r-- | main.go | 80 |
1 files changed, 61 insertions, 19 deletions
@@ -2,6 +2,7 @@ package main import ( "crypto/sha256" + "encoding/json" "flag" "fmt" "io" @@ -14,14 +15,6 @@ import ( "strings" ) -/* -func writeCache(fn string) error { -} - -func readCache(fn string) error { -} -*/ - // sha256Sum Takes an io.Reader and computes the checksum returning it as a // formatted string and an error if any func sha256Sum(rdr io.Reader) (string, error) { @@ -48,7 +41,8 @@ type ChecksummerResult struct { Path string Checksum string Error error - Info os.FileInfo + Name string + Size int64 } type ChecksummerInput struct { @@ -58,12 +52,37 @@ type ChecksummerInput struct { type ChecksumSlice []*ChecksummerResult +type ChecksummerResultMap map[string]ChecksumSlice + +func (c *ChecksummerResultMap) Save(fn string) error { + fh, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) + if err != nil { + return err + } + defer fh.Close() + + enc := json.NewEncoder(fh) + return enc.Encode(c) +} + +func (c *ChecksummerResultMap) Load(fn string) error { + fh, err := os.Open(fn) + if err != nil { + return err + } + defer fh.Close() + + dec := json.NewDecoder(fh) + + return dec.Decode(c) +} + func (c ChecksumSlice) BuildRemoveList(reg *regexp.Regexp, matchPath bool) ChecksumSlice { removing := ChecksumSlice{} for _, result := range c { s := "" if !matchPath { - s = result.Info.Name() + s = result.Name } else { s = result.Path } @@ -85,7 +104,8 @@ func Checksummer(done chan<- bool, input <-chan *ChecksummerInput, results chan< Path: i.Path, Checksum: sum, Error: err, - Info: i.Info, + Name: i.Info.Name(), + Size: i.Info.Size(), } } done <- true @@ -153,6 +173,7 @@ func main() { removeRegexStr := fl.String("regex", "", "Regular expression to match duplicated files") removeYes := fl.Bool("yes-i-want-my-data-gone", false, "Actually remove the files") matchPath := fl.Bool("match-path", false, "match on the path, rather than the filename") + cacheFile := fl.String("cache", "", "If not an empty string, the data gathered on a directory will be cached to the file, allowing subsequent runs to be near instant. No care is taken to check whether or not the cache is up to date with the current state of the directroy. If in doubt, leave empty.") _ = fl.Parse(os.Args[1:]) @@ -165,18 +186,32 @@ func main() { log.Fatalf("Error compiling provided regular expression: %s", err) } - checksums := make(map[string]ChecksumSlice) + // checksums := make(map[string]ChecksumSlice) + checksums := make(ChecksummerResultMap) pths := make(chan *ChecksummerInput) results := make(chan *ChecksummerResult) done := make(chan bool) + finished := 0 + + if *cacheFile != "" { + err = checksums.Load(*cacheFile) + if err != nil && !strings.Contains(err.Error(), "no such file or directory") { + fmt.Fprintln(os.Stderr, "Error loading cache file: ", err) + } + + if err == nil { + fmt.Fprintln(os.Stderr, "Loaded cache file, using") + goto parsing + } + } + for j := 0; j < *procs; j++ { go Checksummer(done, pths, results) } go Scanner(done, *all, *path, pths) - finished := 0 wait: for { select { @@ -203,8 +238,15 @@ wait: } - // fmt.Println("Actually removing: ", *removeYes) + if *cacheFile != "" { + fmt.Fprintln(os.Stderr, "Saving cache file") + err = checksums.Save(*cacheFile) + if err != nil { + fmt.Fprintln(os.Stderr, "Error saving cache file: ", err) + } + } +parsing: for sum, list := range checksums { if len(list) <= 1 { continue @@ -218,7 +260,7 @@ wait: for _, result := range list { fmt.Printf("%s::%d::%s::%s\n", sum, - result.Info.Size(), + result.Size, result.Path, "not removing") } @@ -229,7 +271,7 @@ wait: for _, f := range removing { fmt.Printf("%s::%d::%s::%s\n", sum, - f.Info.Size(), + f.Size, f.Path, "would remove") } @@ -237,7 +279,7 @@ wait: for _, f := range removing { fmt.Printf("%s::%d::%s::%s\n", sum, - f.Info.Size(), + f.Size, f.Path, "removing") err := os.Remove(f.Path) @@ -274,13 +316,13 @@ wait: } case *script: for _, result := range list { - fmt.Printf("%s::%d::%s\n", sum, result.Info.Size(), result.Path) + fmt.Printf("%s::%d::%s\n", sum, result.Size, result.Path) } default: fmt.Println(sum) for i, result := range list { if i == 0 { - fmt.Println(formatBytes(result.Info.Size())) + fmt.Println(formatBytes(result.Size)) } fmt.Println("\t" + result.Path) } |
