Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # words_compression.py
- # note: the following algorithm is already in zlib
- import zlib, base64
- # todo: replace the following z string with scrabble words
- z = '''
- aardvark
- aback
- abaft
- abandon
- abandoned
- abandoning
- abandonment
- abandons
- abase
- abased
- abasement
- abasements
- abases
- abash
- abashed
- abashes
- abashing
- abasing
- abate
- abated
- abatement
- abatements
- abater
- abates
- abating
- ...
- zone
- zoned
- zones
- zoning
- zoo
- zoological
- zoologically
- zoom
- zooms
- zoos
- '''
- s = list('''~'!234567890_+`!@#$%^&*()-={}[]":;\|<>,.?/''')
- z = '*'.join([x for x in z.splitlines() if x.islower()])
- compressed = zlib.compress(z)
- b64 = base64.encodestring(compressed)
- print len(z),len(b64)
- '''
- r = []
- x = []
- c = ''
- zzz = z
- while zzz:
- char = zzz[0]
- zzz = zzz[1:]
- if char == '*':
- 0
- elif c == '':
- c = char
- elif zzz.count(c+char):
- c += char
- else:
- if len(c) > 1 and c not in x:
- r += [((len(c)-1)*zzz.count(c),c)]
- r.sort(reverse=True)
- r=r[:20]
- x = [b for a,b in r]
- if c in x:
- print r
- print
- c = ''
- '''
- r = [(7346, 'ing'), (5411, 'es'), (4338, 'tion'), (4190, 'ed'), (4064, 'ng'), (3764, 're'), (3418, 'en'), (3400, 'ion'), (3380, 'ation'), (2816, 'ers'), (2696, 'nt'), (2692, 'at'), (2690, 'te'), (2492, 'ent'), (2482, 'ate'), (2475, 'ting'), (2408, 'ess'), (2377, 'in'), (2361, 'er'), (2334, 'al')]
- for a,b in r: z=z.replace(b,s.pop(0))
- compressed = zlib.compress(z)
- b64 = base64.encodestring(compressed)
- print len(z),len(b64)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement