Step8rother

TF-IDF в sklearn

Jun 4th, 2023
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.51 KB | Science | 0 0
  1. import pandas as pd
  2. import nltk
  3. from nltk.corpus import stopwords as nltk_stopwords
  4. # < напишите код здесь >
  5. from sklearn.feature_extraction.text import TfidfVectorizer
  6.  
  7. data = pd.read_csv("/datasets/tweets_lemm.csv")
  8. corpus = data['lemm_text'].values.astype('U')
  9.  
  10. nltk.download('stopwords')
  11. stopwords = set(nltk_stopwords.words('russian'))
  12.  
  13. count_tf_idf = TfidfVectorizer(stop_words=stopwords)
  14. tf_idf = count_tf_idf.fit_transform(corpus)
  15.  
  16. print("Размер матрицы:", tf_idf.shape)
Add Comment
Please, Sign In to add comment