My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
;;;; Duplicates - handling duplicate files in dired

;;;; Find marked files in a dired buffer and display which ones have identical contents
;;;; (C)2011 Justin Heyes-Jones
;;;; How to use... mark files you're interest in the dired window, perhaps with `dired-mark-files-regexp'
;;;; then `dired-show-marked-duplicate-files' will open a buffer with a list of duplicated files
;;;; Also `dired-mark-duplicate-files' will mark only superfluous duplicates of the files allowing you to move
;;;; them to another folder or delete them

(defvar *duplicate-buffer* nil)

(defun md5-file(filename)
"Open FILENAME, load it into a buffer and generate the md5 of its contents"
(interactive "f")
(with-temp-buffer
(insert-file-contents filename)
(md5 (current-buffer))))

(defun dired-get-duplicate-marked-file-map()
"return a hashmap of files in the current dired buffer keyed by the md5 of the contents of each file. Where
multiple files share the same md5 they will all be present in the value for that key"
(let ((md5-map (make-hash-table :test 'equal :size 40)))
(if (eq major-mode 'dired-mode)
(let ((filenames (dired-get-marked-files)))
(let ((num-files (length filenames))
(count 0))
(let ((progress-reporter
(make-progress-reporter "Determining which files are duplicated..." 0 num-files)))
(dolist (fn filenames)
(incf count)
; (sit-for 0.3)
(progress-reporter-update progress-reporter count)
(if (file-regular-p fn)
(let ((md5 (md5-file fn)))
(let ((map-entry (gethash md5 md5-map nil)))
(puthash md5 (cons fn map-entry) md5-map)))))
(progress-reporter-done progress-reporter)))))
md5-map))

(defun show-duplicate(key value)
"Given the KEY and VALUE of a map entry for a given md5, if there is more than one filename in the list
of files then display them as duplicates"
(if (> (length value) 1)
(let ((str (format "%d duplicates of %s\n" (length value) (first value))))
(dolist (filename (rest value))
(setf str (concat str (format "%s\n" filename))))
(insert str))))

(defun dired-show-marked-duplicate-files()
"For each marked file in a dired buffer determine which have the same contents"
(interactive)
(if (eq major-mode 'dired-mode)
(let ((md5-map (dired-get-duplicate-marked-file-map)))
(setf *duplicate-buffer* (get-buffer-create "Duplicated files"))
(goto-line 1 *duplicate-buffer*)
(erase-buffer)
(maphash #'show-duplicate md5-map))
(error (format "Not a Dired buffer \(%s\)" major-mode))))

(defun dired-mark-duplicates(key value)
"KEY is the MD5 of some set of 1 or more files in the dired buffer, while VALUE is a list of filenames. In order to mark
only duplicates we'll ignore the first file arbitrarily and mark the remaining ones one. More complicated or interactive
strategies could be considered such as keeping the one with the shorter filename, most recent modified date and so on."
(let ((rest (rest value)))
(when rest
(dolist (file rest)
(dired-goto-file file)
(dired-mark 1)))))

(defun dired-mark-duplicate-files()
"For each marked file in a dired buffer determine which have the same contents and then leave only the duplicates marked"
(interactive)
(if (eq major-mode 'dired-mode)
(let ((md5-map (dired-get-duplicate-marked-file-map)))
(dired-unmark-all-marks)
(maphash #'dired-mark-duplicates md5-map))
(error (format "Not a Dired buffer \(%s\)" major-mode))))

Change log

r4 by justinhj on Jun 14, 2011   Diff
add progress indicator for the loop that
grabs the md5 from each file
Go to: 
Project members, sign in to write a code review

Older revisions

r3 by justinhj on Jun 5, 2011   Diff
Added functionality to leave only the
superfluous duplicates marked and
refactored a little to avoid
duplicated code
r2 by justinhj on Jun 1, 2011   Diff
First checkin
All revisions of this file

File info

Size: 3513 bytes, 77 lines
Powered by Google Project Hosting