aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMitch Riedstra <mitch@riedstra.us>2020-10-25 16:49:46 -0400
committerMitch Riedstra <mitch@riedstra.us>2020-10-25 16:49:46 -0400
commit7752715a53155e4809f91e5894dfb2e9b3d35544 (patch)
treeb9ac015df5879142f87c316097487c37f67534c3
parent9a8ca79ddb3b5f309c1ee6c7a8713a38fc81fe2b (diff)
downloaddeduplicator-7752715a53155e4809f91e5894dfb2e9b3d35544.tar.gz
deduplicator-7752715a53155e4809f91e5894dfb2e9b3d35544.tar.xz
It works for my purposes
-rw-r--r--main.go80
1 files changed, 61 insertions, 19 deletions
diff --git a/main.go b/main.go
index 2b032be..a84eadb 100644
--- a/main.go
+++ b/main.go
@@ -2,6 +2,7 @@ package main
import (
"crypto/sha256"
+ "encoding/json"
"flag"
"fmt"
"io"
@@ -14,14 +15,6 @@ import (
"strings"
)
-/*
-func writeCache(fn string) error {
-}
-
-func readCache(fn string) error {
-}
-*/
-
// sha256Sum Takes an io.Reader and computes the checksum returning it as a
// formatted string and an error if any
func sha256Sum(rdr io.Reader) (string, error) {
@@ -48,7 +41,8 @@ type ChecksummerResult struct {
Path string
Checksum string
Error error
- Info os.FileInfo
+ Name string
+ Size int64
}
type ChecksummerInput struct {
@@ -58,12 +52,37 @@ type ChecksummerInput struct {
type ChecksumSlice []*ChecksummerResult
+type ChecksummerResultMap map[string]ChecksumSlice
+
+func (c *ChecksummerResultMap) Save(fn string) error {
+ fh, err := os.OpenFile(fn, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
+ if err != nil {
+ return err
+ }
+ defer fh.Close()
+
+ enc := json.NewEncoder(fh)
+ return enc.Encode(c)
+}
+
+func (c *ChecksummerResultMap) Load(fn string) error {
+ fh, err := os.Open(fn)
+ if err != nil {
+ return err
+ }
+ defer fh.Close()
+
+ dec := json.NewDecoder(fh)
+
+ return dec.Decode(c)
+}
+
func (c ChecksumSlice) BuildRemoveList(reg *regexp.Regexp, matchPath bool) ChecksumSlice {
removing := ChecksumSlice{}
for _, result := range c {
s := ""
if !matchPath {
- s = result.Info.Name()
+ s = result.Name
} else {
s = result.Path
}
@@ -85,7 +104,8 @@ func Checksummer(done chan<- bool, input <-chan *ChecksummerInput, results chan<
Path: i.Path,
Checksum: sum,
Error: err,
- Info: i.Info,
+ Name: i.Info.Name(),
+ Size: i.Info.Size(),
}
}
done <- true
@@ -153,6 +173,7 @@ func main() {
removeRegexStr := fl.String("regex", "", "Regular expression to match duplicated files")
removeYes := fl.Bool("yes-i-want-my-data-gone", false, "Actually remove the files")
matchPath := fl.Bool("match-path", false, "match on the path, rather than the filename")
+ cacheFile := fl.String("cache", "", "If not an empty string, the data gathered on a directory will be cached to the file, allowing subsequent runs to be near instant. No care is taken to check whether or not the cache is up to date with the current state of the directroy. If in doubt, leave empty.")
_ = fl.Parse(os.Args[1:])
@@ -165,18 +186,32 @@ func main() {
log.Fatalf("Error compiling provided regular expression: %s", err)
}
- checksums := make(map[string]ChecksumSlice)
+ // checksums := make(map[string]ChecksumSlice)
+ checksums := make(ChecksummerResultMap)
pths := make(chan *ChecksummerInput)
results := make(chan *ChecksummerResult)
done := make(chan bool)
+ finished := 0
+
+ if *cacheFile != "" {
+ err = checksums.Load(*cacheFile)
+ if err != nil && !strings.Contains(err.Error(), "no such file or directory") {
+ fmt.Fprintln(os.Stderr, "Error loading cache file: ", err)
+ }
+
+ if err == nil {
+ fmt.Fprintln(os.Stderr, "Loaded cache file, using")
+ goto parsing
+ }
+ }
+
for j := 0; j < *procs; j++ {
go Checksummer(done, pths, results)
}
go Scanner(done, *all, *path, pths)
- finished := 0
wait:
for {
select {
@@ -203,8 +238,15 @@ wait:
}
- // fmt.Println("Actually removing: ", *removeYes)
+ if *cacheFile != "" {
+ fmt.Fprintln(os.Stderr, "Saving cache file")
+ err = checksums.Save(*cacheFile)
+ if err != nil {
+ fmt.Fprintln(os.Stderr, "Error saving cache file: ", err)
+ }
+ }
+parsing:
for sum, list := range checksums {
if len(list) <= 1 {
continue
@@ -218,7 +260,7 @@ wait:
for _, result := range list {
fmt.Printf("%s::%d::%s::%s\n",
sum,
- result.Info.Size(),
+ result.Size,
result.Path,
"not removing")
}
@@ -229,7 +271,7 @@ wait:
for _, f := range removing {
fmt.Printf("%s::%d::%s::%s\n",
sum,
- f.Info.Size(),
+ f.Size,
f.Path,
"would remove")
}
@@ -237,7 +279,7 @@ wait:
for _, f := range removing {
fmt.Printf("%s::%d::%s::%s\n",
sum,
- f.Info.Size(),
+ f.Size,
f.Path,
"removing")
err := os.Remove(f.Path)
@@ -274,13 +316,13 @@ wait:
}
case *script:
for _, result := range list {
- fmt.Printf("%s::%d::%s\n", sum, result.Info.Size(), result.Path)
+ fmt.Printf("%s::%d::%s\n", sum, result.Size, result.Path)
}
default:
fmt.Println(sum)
for i, result := range list {
if i == 0 {
- fmt.Println(formatBytes(result.Info.Size()))
+ fmt.Println(formatBytes(result.Size))
}
fmt.Println("\t" + result.Path)
}