Advertisement
PuriDevelopers

Untitled

Mar 27th, 2023
105
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.84 KB | None | 0 0
  1. import gensim
  2. import spacy
  3. import random
  4.  
  5. # Load the pre-trained models
  6. nlp_en = spacy.load('en_core_web_md')
  7. nlp_pl = spacy.load('pl_core_news_md')
  8. model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)
  9.  
  10. def generate_titles(title, tags, num_titles):
  11. # Split the title into words
  12. words = title.split()
  13.  
  14. # Determine the language of the title
  15. language = detect_language(title)
  16.  
  17. # Use the appropriate SpaCy model based on the language
  18. if language == 'en':
  19. nlp = nlp_en
  20. elif language == 'pl':
  21. nlp = nlp_pl
  22. else:
  23. raise ValueError("Unsupported language: {}".format(language))
  24.  
  25. # Generate alternative titles
  26. alternative_titles = []
  27. for i in range(num_titles):
  28. alternative_title = []
  29. for word in words:
  30. if word.lower() in tags:
  31. # Replace the tag with a synonym
  32. synonym = find_synonym(word, language)
  33. if synonym is not None:
  34. alternative_title.append(synonym)
  35. else:
  36. alternative_title.append(word)
  37. else:
  38. alternative_title.append(word)
  39. # Convert the list of words back into a string
  40. alternative_title = " ".join(alternative_title)
  41. alternative_titles.append(alternative_title)
  42. return alternative_titles
  43.  
  44. def detect_language(text):
  45. # Use SpaCy to detect the language of the text
  46. doc = nlp_en(text)
  47. lang = doc.lang_
  48. return lang
  49.  
  50. def find_synonym(word, language):
  51. # Find a synonym for the word using the pre-trained word2vec model
  52. try:
  53. synonyms = model.most_similar(positive=[word], topn=10)
  54. for syn, sim in synonyms:
  55. if detect_language(syn) == language:
  56. return syn
  57. return None
  58. except KeyError:
  59. return None
  60.  
  61. def generate_tags(title):
  62. # Use SpaCy to extract named entities from the title
  63. doc = nlp_en(title)
  64. named_entities = [ent.text for ent in doc.ents]
  65.  
  66. # Find the most similar words to each named entity using the pre-trained word2vec model
  67. tags = []
  68. for entity in named_entities:
  69. try:
  70. synonyms = model.most_similar(positive=[entity], topn=5)
  71. for syn, sim in synonyms:
  72. if syn.lower() not in tags:
  73. tags.append(syn.lower())
  74. except KeyError:
  75. pass
  76. return tags
  77.  
  78. # Example usage
  79. title = "How to make a cake"
  80. num_titles = 10
  81. alternative_titles = generate_titles(title, generate_tags(title), num_titles)
  82. tags = generate_tags(title)
  83. print("Original title: {}".format(title))
  84. print("Tags: {}".format(tags))
  85. print("Generated titles:")
  86. for i, alt_title in enumerate(alternative_titles):
  87. print("{}. {}".format(i+1, alt_title))
  88.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement