www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit 2d25a38202e9c99446b4fe6b2b88d92a9bd2acdc
parent f72d3aca11c0800008a5ad90ca2442f9abc4c38b
Author: Georges Dupéron <jahvascriptmaniac+github@free.fr>
Date:   Sun,  4 Sep 2011 11:27:30 +0200

Merge branch 'master' of github:jsmaniac/2010-detection-doublons

Diffstat:
Aupdatehash.py | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 76 insertions(+), 0 deletions(-)

diff --git a/updatehash.py b/updatehash.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import hashlib +import os +import sqlite3 +import time +import sys + +def checksumFile(path): + md5 = hashlib.md5() + sha1 = hashlib.sha1() + with open(path,'rb') as f: + while True: + chunk = f.read(2*md5.block_size*sha1.block_size) + if not chunk: + return {'md5':md5.hexdigest(), 'sha1':sha1.hexdigest()} + md5.update(chunk) + sha1.update(chunk) + +def fileInfo(path): + stat = os.stat(path) + return {'mtime':stat.st_mtime, 'size':stat.st_size} + +def initdb(cursor): + cursor.execute("create table if not exists files(tag,timestamp,path primary key,md5,sha1,mtime,size)") + cursor.execute("create index if not exists i_files_tag on files(tag)") + cursor.execute("create index if not exists i_files_path_md5_sha1 on files(path,md5,sha1)") + +def cacheFileInfo(cursor, path): + cursor.execute('select mtime,size from files where path = ?', (path,)) + data = cursor.fetchone() + return data and {'mtime':data[0], 'size':data[1]} + +def update(connection,cursor,path): + timestamp = time.clock() + currentTime = timestamp + lastTime = currentTime + for d in os.walk(path): + dirpath=d[0] + for f in d[2]: + fpath = os.path.join(dirpath, f) + if os.path.isfile(fpath): + fi = fileInfo(fpath) + cfi = cacheFileInfo(cursor,fpath) + if fi != cfi: + print " updating", fpath + md5,sha1 = checksumFile(fpath) + values = ('no tag',timestamp,fpath,md5,sha1,fi['mtime'],fi['size']) + cursor.execute("insert or replace into files(tag,timestamp,path,md5,sha1,mtime,size) values(?,?,?,?,?,?,?)", values) + + currentTime = time.clock() + if abs(lastTime-currentTime) >= 0.1: + lastTime = currentTime + connection.commit() + print "commit!" + +def walk(db,path): + connection = sqlite3.connect(db) + connection.text_factory = str # For utf-8 file names… + cursor = connection.cursor() + initdb(cursor) + update(connection, cursor, path) + connection.commit() + cursor.close() + +def help(): + print 'Usage : %s database-file directory' % sys.argv[0] + sys.exit(1) + +if len(sys.argv) < 3: + help() +for arg in sys.argv[1:]: + if arg == '-h' or arg == '--help': + help() + +walk(sys.argv[1], sys.argv[2])