aboutsummaryrefslogtreecommitdiff
path: root/dedup.go
blob: a6b4549e2b41fc21f10e90a2d69a3a0884d10ba4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package main

import (
	"crypto/sha256"
	"flag"
	"fmt"
	"io"
	"os"
	"path/filepath"
	"strings"
)

// sha256Sum Takes an io.Reader and computes the checksum returning it as a
// formatted string and an error if any
func sha256Sum(rdr io.Reader) (string, error) {
	h := sha256.New()
	if _, err := io.Copy(h, rdr); err != nil {
		return "", err
	}

	return fmt.Sprintf("%X", h.Sum(nil)), nil
}

// Takes a filepath and returns the sha256sum and an error if any
func getChecksum(fileName string) (string, error) {
	fh, err := os.Open(fileName)
	if err != nil {
		return "", err
	}
	defer fh.Close()

	return sha256Sum(fh)
}

func main() {
	fl := flag.NewFlagSet("deduplicator", flag.ExitOnError)

	path := fl.String("path", ".", "Path to deduplicate files in")
	all := fl.Bool("a", false, "Scan hidden files as well")
	script := fl.Bool("s", false, "Output format sutiable for scripts")

	_ = fl.Parse(os.Args[1:])

	// checksum -> filepaths
	checksums := make(map[string][]string)

	err := filepath.Walk(*path, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return err
		}

		if !*all {
			if strings.Contains(path, "/.") {
				return nil
			}
		}

		if info.IsDir() {
			return nil
		}

		sum, err := getChecksum(path)
		if err != nil {
			return err
		}

		if _, ok := checksums[sum]; !ok {
			checksums[sum] = []string{path}
		} else {
			checksums[sum] = append(
				checksums[sum],
				path)
		}

		return nil
	})

	if err != nil {
		fmt.Fprintln(os.Stderr, err)
	}

	for sum, list := range checksums {
		if len(list) == 1 {
			continue
		}

		switch {
		case *script:
			for _, path := range list {
				fmt.Printf("%s::%s\n", sum, path)
			}
		default:
			fmt.Println(sum)
			for _, path := range list {
				fmt.Println("\t" + path)
			}
		}
	}

}