www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

updatehash.sql (2121B)


      1 # Size of duplicates that can be removed (doesn't count the size of the copy we leave)
      2 select round((B.tot-A.tot)/(1024.*1024.*1024.),2)||' Gb' from (select sum(size) as tot from (select distinct md5,sha1,size from files)) as A, (select sum(size) as tot from (select md5,sha1,size from files)) as B;
      3 
      4 # List of duplicates (all copies)
      5 select size,path from files where md5||'#'||sha1||'#'||size in (select md5||'#'||sha1||'#'||size from files group by md5,sha1,size having count(path) > 1) order by size;
      6 
      7 # Total count of files and total weight in Gb
      8 select round(sum(size)/(1024.*1024.*1024.),2)||' Gb        '||count(size)||' files' from files;
      9 
     10 # find files not in folder A which have a duplicate in folder A.
     11 update files set tag = 'A' where path like './A/%';
     12 create table hashesA(id, hash);
     13 insert into hashesA select rowid,size||'#'||md5||'#'||sha1 from files where tag == 'A';
     14 create table hashesother(id, hash);
     15 insert into hashesother select rowid,size||'#'||md5||'#'||sha1 from files where tag != 'A';
     16 create index i_hashesA_hash on hashesA(hash);
     17 create index i_hashesother_hash on hashesother(hash);
     18 # find files not in folder A which have a duplicate in folder A.
     19 select (select path from files where rowid == hashesother.id) from hashesother where hashesother.hash in (select hash from hashesA);
     20 # find files not in folder A associated with one of their duplicates in folder A.
     21 select (select path from files where rowid == hashesother.id),(select (select path from files where rowid == hashesA.id) from hashesA where hashesA.hash == hashesother.hash) from hashesother where hashesother.hash in (select hash from hashesA);
     22 
     23 # Rename (prepend ".% to file name) files not in folder A which have a duplicate in folder A.
     24 [ -e hashes.db ] && sqlite3 hashes.db "select (select path from files where rowid == hashesother.id) from hashesother where hashesother.hash in (select hash from hashesA);" | sort > dup.lst
     25 pv -l dup.lst | while read ab; do file="${ab##*/}"; dir="${ab%/*}"; dest="${dir}/.%${file}"; if [ -e "$ab" ]; then [ "$file" != "${file#.%}" ] || [ -e "$dest" ] || mv -i -- "$ab" "$dest"; fi; done