aboutsummaryrefslogtreecommitdiff
path: root/dedup.go
diff options
context:
space:
mode:
Diffstat (limited to 'dedup.go')
-rw-r--r--dedup.go100
1 files changed, 100 insertions, 0 deletions
diff --git a/dedup.go b/dedup.go
new file mode 100644
index 0000000..a6b4549
--- /dev/null
+++ b/dedup.go
@@ -0,0 +1,100 @@
+package main
+
+import (
+ "crypto/sha256"
+ "flag"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+)
+
+// sha256Sum Takes an io.Reader and computes the checksum returning it as a
+// formatted string and an error if any
+func sha256Sum(rdr io.Reader) (string, error) {
+ h := sha256.New()
+ if _, err := io.Copy(h, rdr); err != nil {
+ return "", err
+ }
+
+ return fmt.Sprintf("%X", h.Sum(nil)), nil
+}
+
+// Takes a filepath and returns the sha256sum and an error if any
+func getChecksum(fileName string) (string, error) {
+ fh, err := os.Open(fileName)
+ if err != nil {
+ return "", err
+ }
+ defer fh.Close()
+
+ return sha256Sum(fh)
+}
+
+func main() {
+ fl := flag.NewFlagSet("deduplicator", flag.ExitOnError)
+
+ path := fl.String("path", ".", "Path to deduplicate files in")
+ all := fl.Bool("a", false, "Scan hidden files as well")
+ script := fl.Bool("s", false, "Output format sutiable for scripts")
+
+ _ = fl.Parse(os.Args[1:])
+
+ // checksum -> filepaths
+ checksums := make(map[string][]string)
+
+ err := filepath.Walk(*path, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+
+ if !*all {
+ if strings.Contains(path, "/.") {
+ return nil
+ }
+ }
+
+ if info.IsDir() {
+ return nil
+ }
+
+ sum, err := getChecksum(path)
+ if err != nil {
+ return err
+ }
+
+ if _, ok := checksums[sum]; !ok {
+ checksums[sum] = []string{path}
+ } else {
+ checksums[sum] = append(
+ checksums[sum],
+ path)
+ }
+
+ return nil
+ })
+
+ if err != nil {
+ fmt.Fprintln(os.Stderr, err)
+ }
+
+ for sum, list := range checksums {
+ if len(list) == 1 {
+ continue
+ }
+
+ switch {
+ case *script:
+ for _, path := range list {
+ fmt.Printf("%s::%s\n", sum, path)
+ }
+ default:
+ fmt.Println(sum)
+ for _, path := range list {
+ fmt.Println("\t" + path)
+ }
+ }
+ }
+
+}