commit 9a2cc358d13ada1b559c80f63831b0e448af2d5f
parent 997c9044ae6d429a9b5dbb911c78473f4a866196
Author: Georges Dupéron <jahvascriptmaniac+github@free.fr>
Date: Sat, 31 Mar 2012 21:14:24 +0200
SQL commands to extract files not in folder A which have a duplicate in folder A. Can easily be extended to files in folder B which have a duplicate in folder A.
Diffstat:
1 file changed, 17 insertions(+), 0 deletions(-)
diff --git a/updatehash.sql b/updatehash.sql
@@ -6,3 +6,20 @@ select size,path from files where md5||'#'||sha1||'#'||size in (select md5||'#'|
# Total count of files and total weight in Gb
select round(sum(size)/(1024.*1024.*1024.),2)||' Gb '||count(size)||' files' from files;
+
+# find files not in folder A which have a duplicate in folder A.
+update files set tag = 'A' where path like './A/%';
+create table hashesA(id, hash);
+insert into hashesA select rowid,size||'#'||md5||'#'||sha1 from files where tag == 'A';
+create table hashesother(id, hash);
+insert into hashesother select rowid,size||'#'||md5||'#'||sha1 from files where tag != 'A';
+create index i_hashesA_hash on hashesA(hash);
+create index i_hashesother_hash on hashesother(hash);
+# find files not in folder A which have a duplicate in folder A.
+select (select path from files where rowid == hashesother.id) from hashesother where hashesother.hash in (select hash from hashesA);
+# find files not in folder A associated with one of their duplicates in folder A.
+select (select path from files where rowid == hashesother.id),(select (select path from files where rowid == hashesA.id) from hashesA where hashesA.hash == hashesother.hash) from hashesother where hashesother.hash in (select hash from hashesA);
+
+# Rename (prepend ".% to file name) files not in folder A which have a duplicate in folder A.
+[ -e hashes.db ] && sqlite3 hashes.db "select (select path from files where rowid == hashesother.id) from hashesother where hashesother.hash in (select hash from hashesA);" > dup.lst
+pv -l dup.lst | while read ab; do file="${ab##*/}"; dir="${ab%/*}"; dest="${dir}/.%${file}"; if [ -e "$ab" ]; then [ "$file" != "${file#.%}" ] || [ -e "$dest" ] || mv -i "$ab" "$dest"; fi; done