Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import gensim
- import spacy
- import random
- # Load the pre-trained models
- nlp_en = spacy.load('en_core_web_md')
- nlp_pl = spacy.load('pl_core_news_md')
- model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)
- def generate_titles(title, tags, num_titles):
- # Split the title into words
- words = title.split()
- # Determine the language of the title
- language = detect_language(title)
- # Use the appropriate SpaCy model based on the language
- if language == 'en':
- nlp = nlp_en
- elif language == 'pl':
- nlp = nlp_pl
- else:
- raise ValueError("Unsupported language: {}".format(language))
- # Generate alternative titles
- alternative_titles = []
- for i in range(num_titles):
- alternative_title = []
- for word in words:
- if word.lower() in tags:
- # Replace the tag with a synonym
- synonym = find_synonym(word, language)
- if synonym is not None:
- alternative_title.append(synonym)
- else:
- alternative_title.append(word)
- else:
- alternative_title.append(word)
- # Convert the list of words back into a string
- alternative_title = " ".join(alternative_title)
- alternative_titles.append(alternative_title)
- return alternative_titles
- def detect_language(text):
- # Use SpaCy to detect the language of the text
- doc = nlp_en(text)
- lang = doc.lang_
- return lang
- def find_synonym(word, language):
- # Find a synonym for the word using the pre-trained word2vec model
- try:
- synonyms = model.most_similar(positive=[word], topn=10)
- for syn, sim in synonyms:
- if detect_language(syn) == language:
- return syn
- return None
- except KeyError:
- return None
- def generate_tags(title):
- # Use SpaCy to extract named entities from the title
- doc = nlp_en(title)
- named_entities = [ent.text for ent in doc.ents]
- # Find the most similar words to each named entity using the pre-trained word2vec model
- tags = []
- for entity in named_entities:
- try:
- synonyms = model.most_similar(positive=[entity], topn=5)
- for syn, sim in synonyms:
- if syn.lower() not in tags:
- tags.append(syn.lower())
- except KeyError:
- pass
- return tags
- # Example usage
- title = "How to make a cake"
- num_titles = 10
- alternative_titles = generate_titles(title, generate_tags(title), num_titles)
- tags = generate_tags(title)
- print("Original title: {}".format(title))
- print("Tags: {}".format(tags))
- print("Generated titles:")
- for i, alt_title in enumerate(alternative_titles):
- print("{}. {}".format(i+1, alt_title))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement