Advertisement
here2share

# longest_repeat_substring.py

Sep 9th, 2015
233
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.62 KB | None | 0 0
  1. # longest_repeat_substring.py
  2.  
  3. data = '''
  4. abcfpcompassionatenrzoujdqicbsexgvahltcpghsexbmkentrepreneurshipsfrnujwqiz
  5. ovaltdylcmkfenergeticztgsexbnwdyx123456789abchtrzujhqiwy123456789abcycpgml
  6. kfnbvasexdtrhzoncdqipmbygconversationalistsdwsexujlakfdplsexbhwnrymtkfqien
  7. trepreneurshipasgczoujvawtabcdefg0123456dmnlgkfyvaqihubrpczosexkfentrepren
  8. eurshipsaujphwlmzovaicrbtdsexgnylujsexccompassionateyrqinwgkfdtzovapmhbrde
  9. nergeticqrkfwczoujtynhqilbvapgmsexlwgbcompassionatetujsexmkfrcvzoyndpqihbv
  10. aypwlmjcdhtrqienergeticwrlngsexkfzodcgwbenergeticermrvatqikfnzomyhujplsexm
  11. hdpwgujvakzoyqicrbsexntlyvatwqiguj01234567890123456789sexkfhzobnrdprzosext
  12. cywmdhnlkfpvacompassionategujqimtvazohylsexkfpcujbdrgnqiwtzogpcvahujdmrwlb
  13. 01234567890123456789kfnsexzobrythqimvadcngentrepreneurshipssexwujpkltsexbn
  14. rd01234567890123456789mhlgcujqikfzowsexgujabnzoytldcqikfwmprhsextzohcompas
  15. sionatelrcypdgujvakfqimnwbczotqipysexdcompassionateqflnmghujrwvavaujqiplrz
  16. okfcompassionatehwdbmysextgncdyqignhrlmvatsexcpujcompassionatezokfbsexvacb
  17. nhlprdujenergeticarnwkftqygzomqitnujmgsexva01234567890123456789xyzbhrpkfwu
  18. jsexrclhmtdvankfqibywzenergetictrogpqigujkcompassionateflwhzoaprtnycmsexdb
  19. dysexzognkfctqenlightenmentimujvalwbhrlrvaenergeticyrpzokfcntbmhgujsexpqiy
  20. dwujsexblm01234567890123456789owkrdqicpnvagbdqigujhpmkfnrzoytvawcsexlcompa
  21. ssionatekenergeticurqfujsexbydtrmpcvahqinzolgwwvaykfbzodqilnrgsexyujentrep
  22. reneurshipsmphvarghydplzocentrepreneurshipskfsexqiwbmtnjlzotkfvagnyujhbent
  23. repreneurshipswpsexdrmqicrcpqmhdsexzoykfvalngenergeticirwtujbvaltmenergeti
  24. corzowpqiyhnbujrsexkfdcynqimclrpujvawsexdzokf01234567890123456789gpyclwhkf
  25. rzoabcdefg0123456tvagbnujqidsexmkfbwzovz123456789abcaqitpgnylsexhrujdchmse
  26. xygcompassionateujrptwczoqilvanbdkfmqilthybnk01234567890123456789ujcwsexva
  27. zodzodbqitsexwgnrhpvakfmujlenergeticrcyujlkfrcytnqisexhzocompassionatevwpg
  28. mbdvaqicompassionatewvawbpgrthlsexycovfujndmnvabwhenergeticvrkfujdglmqiyps
  29. extzocrvamzoqilrconversationalistseyujpkftgnsexdbhwcqidpbhlzovaentrepreneu
  30. rshipsgujkftnsexwrmyl01234567890123456789fdpqimgzotcnbysexvahwnujbhqi01234
  31. 567890123456789wdgvasexymcpzokfyuv123456789abcjnpgcvarmzohkftdlsexbqiqitbp
  32. hvansexzolycenergeticsrdrkfwmgujwthqirpzenlightenmentoujcvalndsexmgkfbpzog
  33. kfujmnqilrabcdefg0123456bdyhwvactsexywkfsexmqidvarzohupc012345678901234567
  34. 89bldentrepreneurshipsbujsextplrcqizowvaghkfbymnmujrkfbygsexdhzovapqilncom
  35. passionatexcwpujybldwckfsexvahqabcdefg0123456imzorgtnqprlcsexwmdvatzonujgk
  36. fhbyhentrepreneurshipsnujmbypwvatzokfdlqigsexrcdcqihzg01234567890123456789
  37. xyzrsexwlujmbthngbmultibillionairepsextywqizovalrcmudkfmghzosexqikfenerget
  38. icdrlntbrcdqihwglmcompassionatezujvatcysexbdnrkfytrnzosexvaujfgcqrgnhpackn
  39. owledgmentmujtwzosexlqicqilrconversationalistsfycgsexdkfpcompassionatenxyz
  40. '''
  41.  
  42. print data
  43. data = data.strip()
  44. print '\n', len(data)
  45.  
  46. def longest_seq(string,results,minlen=5,cutoff=2500):
  47.     sss = string[:cutoff]
  48.     d = sss[int(len(sss)*0.5):]
  49.     longest=[]
  50.     size=[]
  51.     while len(d) > minlen:
  52.         t=d=d[1:]
  53.         while len(t) > minlen:
  54.             if sss.count(t) > 1:
  55.                 for i in longest:
  56.                     if t in i: t='!'; break
  57.                 if t != '!':
  58.                     longest=sorted(longest+[t],key=len)
  59.                     longest=longest[-results:]
  60.                     size.append((data.count(t)*len(t),t))
  61.                     size=list(set(size))
  62.                     size=sorted(size)
  63.                     size=size[-results:]
  64.                 break
  65.             t=t[:-1]
  66.     return longest[::-1],size[::-1]
  67.  
  68. data=''.join(data.split())
  69.  
  70. LS,SZ=longest_seq(data,20,8)
  71.  
  72. print '\nLen.: \tOccurs: \tTotal Size: \t\tData:'
  73. ts=0
  74. for i in LS:
  75.     size = data.count(i)*len(i)
  76.     ts += size
  77.     print '%s\t%s\t\t%s\t\t%s' % (len(i), data.count(i), size, i)
  78. print '\n\nSize: \t\tData:'
  79. for i in SZ:
  80.     print '%s\t\t%s' % (i)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement