Advertisement
here2share

# words_compression.py

Oct 7th, 2018
274
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.48 KB | None | 0 0
  1. # words_compression.py
  2.  
  3. # note: the following algorithm is already in zlib
  4.  
  5. import zlib, base64
  6.  
  7. # todo: replace the following z string with scrabble words
  8. z = '''
  9. aardvark
  10. aback
  11. abaft
  12. abandon
  13. abandoned
  14. abandoning
  15. abandonment
  16. abandons
  17. abase
  18. abased
  19. abasement
  20. abasements
  21. abases
  22. abash
  23. abashed
  24. abashes
  25. abashing
  26. abasing
  27. abate
  28. abated
  29. abatement
  30. abatements
  31. abater
  32. abates
  33. abating
  34. ...
  35. zone
  36. zoned
  37. zones
  38. zoning
  39. zoo
  40. zoological
  41. zoologically
  42. zoom
  43. zooms
  44. zoos
  45. '''
  46.  
  47. s = list('''~'!234567890_+`!@#$%^&*()-={}[]":;\|<>,.?/''')
  48.  
  49. z = '*'.join([x for x in z.splitlines() if x.islower()])
  50.  
  51. compressed = zlib.compress(z)
  52. b64 = base64.encodestring(compressed)
  53.  
  54. print len(z),len(b64)
  55.  
  56. '''
  57. r = []
  58. x = []
  59. c = ''
  60. zzz = z
  61. while zzz:
  62.     char = zzz[0]
  63.     zzz = zzz[1:]
  64.     if char == '*':
  65.         0
  66.     elif c == '':
  67.         c = char
  68.     elif zzz.count(c+char):
  69.         c += char
  70.     else:
  71.         if len(c) > 1 and c not in x:
  72.             r += [((len(c)-1)*zzz.count(c),c)]
  73.             r.sort(reverse=True)
  74.             r=r[:20]
  75.             x = [b for a,b in r]
  76.             if c in x:
  77.                 print r
  78.                 print
  79.         c = ''
  80. '''
  81.  
  82. r = [(7346, 'ing'), (5411, 'es'), (4338, 'tion'), (4190, 'ed'), (4064, 'ng'), (3764, 're'), (3418, 'en'), (3400, 'ion'), (3380, 'ation'), (2816, 'ers'), (2696, 'nt'), (2692, 'at'), (2690, 'te'), (2492, 'ent'), (2482, 'ate'), (2475, 'ting'), (2408, 'ess'), (2377, 'in'), (2361, 'er'), (2334, 'al')]
  83.  
  84. for a,b in r: z=z.replace(b,s.pop(0))
  85.  
  86. compressed = zlib.compress(z)
  87. b64 = base64.encodestring(compressed)
  88.  
  89. print len(z),len(b64)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement