commit f72d3aca11c0800008a5ad90ca2442f9abc4c38b
parent e6b27d3e859f1133d1a1073c89602b419228b825
Author: Georges Dupéron <jahvascriptmaniac+github@free.fr>
Date: Sun, 4 Sep 2011 11:28:06 +0200
Remdoubles scripts (should have been commited a while ago).
Diffstat:
6 files changed, 160 insertions(+), 1 deletion(-)
diff --git a/fast-remdoubles.py b/fast-remdoubles.py
@@ -0,0 +1,37 @@
+#!/usr/bin/python
+
+import sys
+import os
+import errno
+import shutil
+
+def identicalFiles(pathA, pathB):
+ bufsize = 4096
+ with open(pathA, 'rb') as a:
+ with open(pathB, 'rb') as b:
+ while True:
+ dataA = a.read(bufsize)
+ dataB = b.read(bufsize)
+ if dataA != dataB:
+ return False
+ if not dataA:
+ return True
+
+nblines=0
+for supprimable in sys.stdin:
+ nblines = (nblines+1)%10240
+ if nblines == 0:
+ os.system("sync");
+ original = sys.stdin.next()
+ supprimable = supprimable[0:-1]
+ original = original[0:-1]
+ if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable):
+ destfile = "delete/" + supprimable
+ try:
+ os.makedirs(os.path.dirname(destfile))
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ shutil.move(supprimable, destfile);
diff --git a/fast-syncdoubles.py b/fast-syncdoubles.py
@@ -0,0 +1,38 @@
+#!/usr/bin/python
+
+import sys
+import os
+import errno
+import shutil
+
+def identicalFiles(pathA, pathB):
+ bufsize = 4096
+ with open(pathA, 'rb') as a:
+ with open(pathB, 'rb') as b:
+ while True:
+ dataA = a.read(bufsize)
+ dataB = b.read(bufsize)
+ if dataA != dataB:
+ return False
+ if not dataA:
+ return True
+
+nblines=0
+for supprimable in sys.stdin:
+ nblines = (nblines+1)%10240
+ if nblines == 0:
+ os.system("sync");
+ original = sys.stdin.next()
+ supprimable = supprimable[0:-1]
+ original = original[0:-1]
+ if supprimable[0:2] == '1/' and os.path.exists(supprimable) and os.path.exists(original) and identicalFiles(original, supprimable):
+ destfile = "sync/" + original
+ if not os.path.exists(destfile):
+ try:
+ os.makedirs(os.path.dirname(destfile))
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ shutil.move(supprimable, destfile);
diff --git a/pre-fast-remdoubles.py b/pre-fast-remdoubles.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python
+
+import sys
+import os
+import errno
+import shutil
+
+oldhash = ""
+original = []
+supprimable = []
+nblines=0
+for line in sys.stdin:
+ nblines = (nblines+1)%500
+ if nblines == 0:
+ os.system("sync");
+ hash = line[0:48]
+ file = line[50:-1]
+ if hash != oldhash:
+ for o,s in zip(original,supprimable):
+ sys.stdout.write(s+"\n"+o+"\0")
+ supprimable = []
+ original = []
+ if file[0:2] == '1/': # Delete files in the directory named 1
+ supprimable.append(file)
+ else:
+ original.append(file)
+
+ oldhash = hash
diff --git a/pre-fast-remdoubles.py.sh b/pre-fast-remdoubles.py.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./pre-fast-remdoubles.py | sort -z | tr '\0' '\n'
diff --git a/remdoubles-full.py b/remdoubles-full.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+import sys
+import os
+import errno
+import shutil
+
+def identicalFiles(pathA, pathB):
+ bufsize = 4096
+ with open(pathA, 'rb') as a:
+ with open(pathB, 'rb') as b:
+ while True:
+ dataA = a.read(bufsize)
+ dataB = b.read(bufsize)
+ if dataA != dataB:
+ return False
+ if not dataA:
+ return True
+
+oldhash = ""
+original = []
+supprimable = []
+for line in sys.stdin:
+ hash = line[0:48]
+ file = line[50:-1]
+ if hash != oldhash:
+ if original != []:
+ for i in supprimable:
+ if os.path.exists(i):
+ for j in original:
+ if os.path.exists(i) and os.path.exists(j) and identicalFiles(j, i):
+ print i + " |||||||||| " + j
+ destfile = "delete/" + i
+ try:
+ os.makedirs(os.path.dirname(destfile))
+ except OSError as e:
+ if e.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+ shutil.move(i, destfile);
+ break
+ supprimable = []
+ original = []
+ if file[0:2] == '1/': # Delete files in the directory named 1
+ supprimable.append(file)
+ else:
+ original.append(file)
+
+ oldhash = hash
diff --git a/remdoubles.py b/remdoubles.py
@@ -20,7 +20,11 @@ def identicalFiles(pathA, pathB):
oldhash = ""
original = ""
supprimable = []
+nblines=0
for line in sys.stdin:
+ nblines = (nblines+1)%500
+ if nblines == 0:
+ os.system("sync");
hash = line[0:48]
file = line[50:-1]
if hash != oldhash:
@@ -39,7 +43,7 @@ for line in sys.stdin:
shutil.move(i, destfile);
supprimable = []
original = ""
- if file[0:2] == 'c/': # Delete files in the directory named c
+ if file[0:2] == 'd/': # Delete files in the directory named d
supprimable.append(file)
else:
if original == "" and os.path.exists(file):