Current Path : /usr/src/games/fortune/tools/ |
FreeBSD hs32.drive.ne.jp 9.1-RELEASE FreeBSD 9.1-RELEASE #1: Wed Jan 14 12:18:08 JST 2015 root@hs32.drive.ne.jp:/sys/amd64/compile/hs32 amd64 |
Current File : //usr/src/games/fortune/tools/do_uniq.py |
#!/usr/local/bin/python # # $FreeBSD: release/9.1.0/games/fortune/tools/do_uniq.py 141477 2005-02-07 21:15:16Z ru $ # # an aggressive little script for trimming duplicate cookies import re, sys wordlist = [ 'hadnot', 'donot', 'hadnt', 'dont', 'have', 'more', 'will', 'your', 'and', 'are', 'had', 'the', 'you', 'am', 'an', 'is', 'll', 've', 'we', 'a', 'd', 'i', 'm', 's', ] def hash(fortune): f = fortune f = f.lower() f = re.sub('[\W_]', '', f) for word in wordlist: f = re.sub(word, '', f) # f = re.sub('[aeiouy]', '', f) # f = re.sub('[^aeiouy]', '', f) f = f[:30] # f = f[-30:] return f def edit(datfile): dups = {} fortunes = [] fortune = "" for line in file(datfile): if line == "%\n": key = hash(fortune) if not dups.has_key(key): dups[key] = [] dups[key].append(fortune) fortunes.append(fortune) fortune = "" else: fortune += line for key in dups.keys(): if len(dups[key]) == 1: del dups[key] o = file(datfile + '~', "w") for fortune in fortunes: key = hash(fortune) if key in dups: print '\n' * 50 for f in dups[key]: if f != fortune: print f, '%' print fortune, '%' if raw_input("Remove last fortune? ") == 'y': del dups[key] continue o.write(fortune + "%\n") o.close() assert len(sys.argv) == 2 edit(sys.argv[1])