commit 2d25a38202e9c99446b4fe6b2b88d92a9bd2acdc
parent f72d3aca11c0800008a5ad90ca2442f9abc4c38b
Author: Georges Dupéron <jahvascriptmaniac+github@free.fr>
Date: Sun, 4 Sep 2011 11:27:30 +0200
Merge branch 'master' of github:jsmaniac/2010-detection-doublons
Diffstat:
| A | updatehash.py | | | 76 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 76 insertions(+), 0 deletions(-)
diff --git a/updatehash.py b/updatehash.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import hashlib
+import os
+import sqlite3
+import time
+import sys
+
+def checksumFile(path):
+ md5 = hashlib.md5()
+ sha1 = hashlib.sha1()
+ with open(path,'rb') as f:
+ while True:
+ chunk = f.read(2*md5.block_size*sha1.block_size)
+ if not chunk:
+ return {'md5':md5.hexdigest(), 'sha1':sha1.hexdigest()}
+ md5.update(chunk)
+ sha1.update(chunk)
+
+def fileInfo(path):
+ stat = os.stat(path)
+ return {'mtime':stat.st_mtime, 'size':stat.st_size}
+
+def initdb(cursor):
+ cursor.execute("create table if not exists files(tag,timestamp,path primary key,md5,sha1,mtime,size)")
+ cursor.execute("create index if not exists i_files_tag on files(tag)")
+ cursor.execute("create index if not exists i_files_path_md5_sha1 on files(path,md5,sha1)")
+
+def cacheFileInfo(cursor, path):
+ cursor.execute('select mtime,size from files where path = ?', (path,))
+ data = cursor.fetchone()
+ return data and {'mtime':data[0], 'size':data[1]}
+
+def update(connection,cursor,path):
+ timestamp = time.clock()
+ currentTime = timestamp
+ lastTime = currentTime
+ for d in os.walk(path):
+ dirpath=d[0]
+ for f in d[2]:
+ fpath = os.path.join(dirpath, f)
+ if os.path.isfile(fpath):
+ fi = fileInfo(fpath)
+ cfi = cacheFileInfo(cursor,fpath)
+ if fi != cfi:
+ print " updating", fpath
+ md5,sha1 = checksumFile(fpath)
+ values = ('no tag',timestamp,fpath,md5,sha1,fi['mtime'],fi['size'])
+ cursor.execute("insert or replace into files(tag,timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?,?)", values)
+
+ currentTime = time.clock()
+ if abs(lastTime-currentTime) >= 0.1:
+ lastTime = currentTime
+ connection.commit()
+ print "commit!"
+
+def walk(db,path):
+ connection = sqlite3.connect(db)
+ connection.text_factory = str # For utf-8 file names…
+ cursor = connection.cursor()
+ initdb(cursor)
+ update(connection, cursor, path)
+ connection.commit()
+ cursor.close()
+
+def help():
+ print 'Usage : %s database-file directory' % sys.argv[0]
+ sys.exit(1)
+
+if len(sys.argv) < 3:
+ help()
+for arg in sys.argv[1:]:
+ if arg == '-h' or arg == '--help':
+ help()
+
+walk(sys.argv[1], sys.argv[2])