aboutsummaryrefslogtreecommitdiff
path: root/content/pipelines/parallel.go
diff options
context:
space:
mode:
authorSameer Ajmani <sameer@golang.org>2014-03-12 23:43:55 -0400
committerSameer Ajmani <sameer@golang.org>2014-03-12 23:43:55 -0400
commit9aa2e1143c55cfa6a0b8651067b34c61da8d5646 (patch)
treef0eb469d9b435c085c77a943225c9eef72d84eb3 /content/pipelines/parallel.go
parent401e4f4a7cd251187c7e3b18b1dc2c583e6d2e45 (diff)
go.blog: pipelines and cancellation.
R=adg, r, rsc, ken, bcmills CC=adonovan, campoy, david.crawshaw, gmlewis, golang-codereviews https://golang.org/cl/71070047
Diffstat (limited to 'content/pipelines/parallel.go')
-rw-r--r--content/pipelines/parallel.go109
1 files changed, 109 insertions, 0 deletions
diff --git a/content/pipelines/parallel.go b/content/pipelines/parallel.go
new file mode 100644
index 0000000..7edee1d
--- /dev/null
+++ b/content/pipelines/parallel.go
@@ -0,0 +1,109 @@
+package main
+
+import (
+ "crypto/md5"
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "sort"
+ "sync"
+)
+
+// A result is the product of reading and summing a file using MD5.
+type result struct {
+ path string
+ sum [md5.Size]byte
+ err error
+}
+
+// sumFiles starts goroutines to walk the directory tree at root and digest each
+// regular file. These goroutines send the results of the digests on the result
+// channel and send the result of the walk on the error channel. If done is
+// closed, sumFiles abandons its work.
+func sumFiles(done <-chan struct{}, root string) (<-chan result, <-chan error) {
+ // For each regular file, start a goroutine that sums the file and sends
+ // the result on c. Send the result of the walk on errc.
+ c := make(chan result)
+ errc := make(chan error, 1)
+ go func() { // HL
+ var wg sync.WaitGroup
+ err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ return nil
+ }
+ wg.Add(1)
+ go func() { // HL
+ data, err := ioutil.ReadFile(path)
+ select {
+ case c <- result{path, md5.Sum(data), err}: // HL
+ case <-done: // HL
+ }
+ wg.Done()
+ }()
+ // Abort the walk if done is closed.
+ select {
+ case <-done: // HL
+ return errors.New("walk canceled")
+ default:
+ return nil
+ }
+ })
+ // Walk has returned, so all calls to wg.Add are done. Start a
+ // goroutine to close c once all the sends are done.
+ go func() { // HL
+ wg.Wait()
+ close(c) // HL
+ }()
+ // No select needed here, since errc is buffered.
+ errc <- err // HL
+ }()
+ return c, errc
+}
+
+// MD5All reads all the files in the file tree rooted at root and returns a map
+// from file path to the MD5 sum of the file's contents. If the directory walk
+// fails or any read operation fails, MD5All returns an error. In that case,
+// MD5All does not wait for inflight read operations to complete.
+func MD5All(root string) (map[string][md5.Size]byte, error) {
+ // MD5All closes the done channel when it returns; it may do so before
+ // receiving all the values from c and errc.
+ done := make(chan struct{}) // HLdone
+ defer close(done) // HLdone
+
+ c, errc := sumFiles(done, root) // HLdone
+
+ m := make(map[string][md5.Size]byte)
+ for r := range c { // HLrange
+ if r.err != nil {
+ return nil, r.err
+ }
+ m[r.path] = r.sum
+ }
+ if err := <-errc; err != nil {
+ return nil, err
+ }
+ return m, nil
+}
+
+func main() {
+ // Calculate the MD5 sum of all files under the specified directory,
+ // then print the results sorted by path name.
+ m, err := MD5All(os.Args[1])
+ if err != nil {
+ fmt.Println(err)
+ return
+ }
+ var paths []string
+ for path := range m {
+ paths = append(paths, path)
+ }
+ sort.Strings(paths)
+ for _, path := range paths {
+ fmt.Printf("%x %s\n", m[path], path)
+ }
+}