package main import ( "fmt" "mapreduce" "os" "sort" "strings" "unicode" ) // The mapping function is called once for each piece of the input. // In this framework, the key is the name of the file that is being processed, // and the value is the file's contents. The return value should be a slice of // key/value pairs, each represented by a mapreduce.KeyValue. func mapF(document string, value string) (res []mapreduce.KeyValue) { // TODO: you should complete this to do the inverted index challenge dict := make(map[string]int) words := strings.Fields(value) cleanWord := "" for _, word := range words { for _, char := range word { if unicode.IsLetter(char) { cleanWord += string(char) } else { if cleanWord != "" { dict[cleanWord]++ cleanWord = "" } } } if cleanWord != "" { dict[cleanWord]++ cleanWord = "" } } var ans []mapreduce.KeyValue for word := range dict { ans = append(ans, mapreduce.KeyValue{word, document}) } return ans } // The reduce function is called once for each key generated by Map, with a // list of that key's string value (merged across all inputs). The return value // should be a single output value for that key. func reduceF(key string, values []string) string { // TODO: you should complete this to do the inverted index challenge set := make(map[string]struct{}) var documents []string for _, doc := range values { set[doc] = struct{}{} } for doc := range set { documents = append(documents, doc) } sort.Strings(documents) ans := strings.Join(documents, ",") result := fmt.Sprintf("%d %s\n", len(documents), ans) return result } // Can be run in 3 ways: // 1) Sequential (e.g., go run wc.go master sequential x1.txt .. xN.txt) // 2) Master (e.g., go run wc.go master localhost:7777 x1.txt .. xN.txt) // 3) Worker (e.g., go run wc.go worker localhost:7777 localhost:7778 &) func main() { if len(os.Args) < 4 { fmt.Printf("%s: see usage comments in file\n", os.Args[0]) } else if os.Args[1] == "master" { var mr *mapreduce.Master if os.Args[2] == "sequential" { mr = mapreduce.Sequential("iiseq", os.Args[3:], 3, mapF, reduceF) } else { mr = mapreduce.Distributed("iiseq", os.Args[3:], 3, os.Args[2]) } mr.Wait() } else { mapreduce.RunWorker(os.Args[2], os.Args[3], mapF, reduceF, 100) } }