Advertisement
techno-

networks part2

Jun 8th, 2024 (edited)
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.04 KB | None | 0 0
  1. #################Sin bucle###
  2. # Load required libraries
  3. library(dplyr)
  4. library(syuzhet)
  5. library(igraph)
  6. library(tidyr)
  7. library(stringr)
  8.  
  9. # Load the dataset
  10. data <- read.csv(file.choose(), header = TRUE)
  11.  
  12. # Sample a quarter of the rows
  13. set.seed(123) # To make the sampling reproducible
  14. sampled_data <- data %>%
  15. sample_frac(0.001)
  16.  
  17. # Convert created_at to datetime
  18. sampled_data$created_at <- as.POSIXct(sampled_data$created_at, format = "%a %b %d %H:%M:%S %z %Y")
  19.  
  20. # Perform sentiment analysis on the text data
  21. sentiment_scores <- get_sentiment(sampled_data$text, method = "afinn")
  22.  
  23. # Merge sentiment scores with the original data
  24. data_with_sentiments <- cbind(sampled_data, sentiment_scores)
  25.  
  26. # Classify sentiment into categories: positive, negative, neutral
  27. data_with_sentiments$sentiment_category <- ifelse(data_with_sentiments$sentiment > 0, "Positive",
  28. ifelse(data_with_sentiments$sentiment < 0, "Negative", "Neutral"))
  29.  
  30. # Filter inbound tweets (from users to companies)
  31. inbound_tweets <- data_with_sentiments %>% filter(inbound == "True")
  32.  
  33. # Filter outbound tweets (companies)
  34. outbound_tweets <- data_with_sentiments %>% filter(inbound == "False")
  35.  
  36. # Extract mentioned companies from inbound tweets using regex
  37. inbound_tweets <- inbound_tweets %>%
  38. mutate(mentioned_company = str_extract(text, "(?<=@)\\w+"))
  39.  
  40. # Filter only valid mentions
  41. inbound_tweets <- inbound_tweets %>% filter(!is.na(mentioned_company))
  42.  
  43. # Filter out companies whose mentioned_company is completely numeric
  44. inbound_tweets <- inbound_tweets %>% filter(!str_detect(mentioned_company, "^\\d+$"))
  45.  
  46. # Create a user-company interaction data frame
  47. user_company_interaction <- inbound_tweets %>%
  48. select(author_id, mentioned_company, sentiment_category)
  49.  
  50. # Create a bipartite graph
  51. bipartite_edges <- as.matrix(user_company_interaction %>% select(author_id, mentioned_company))
  52. g_bipartite <- graph_from_edgelist(bipartite_edges, directed = FALSE)
  53.  
  54. # Set bipartite type (TRUE for companies, FALSE for users)
  55. V(g_bipartite)$type <- V(g_bipartite)$name %in% unique(user_company_interaction$mentioned_company)
  56.  
  57. # Assign sentiment category to vertices
  58. V(g_bipartite)$sentiment_category <- ifelse(V(g_bipartite)$type, NA, user_company_interaction$sentiment_category[match(V(g_bipartite)$name, user_company_interaction$author_id)])
  59.  
  60. # Assign colors to vertices based on type and sentiment
  61. V(g_bipartite)$color <- ifelse(V(g_bipartite)$type, "yellow",
  62. ifelse(V(g_bipartite)$sentiment_category == "Positive", "green",
  63. ifelse(V(g_bipartite)$sentiment_category == "Negative", "red", "blue")))
  64.  
  65. # Increase the size of company nodes based on the number of positive interactions
  66. positive_interactions <- inbound_tweets %>%
  67. filter(sentiment_category == "Positive") %>%
  68. group_by(mentioned_company) %>%
  69. summarise(count = n())
  70.  
  71. vertex_sizes <- sapply(V(g_bipartite)$name, function(x) {
  72. if (x %in% positive_interactions$mentioned_company) {
  73. return(log(positive_interactions$count[positive_interactions$mentioned_company == x] + 1) * 5)
  74. } else {
  75. return(5) # default size for users and companies with no positive interactions
  76. }
  77. })
  78.  
  79. V(g_bipartite)$size <- vertex_sizes
  80.  
  81. # Use the Kamada-Kawai layout for better separation
  82. layout_combined <- layout_with_kk(g_bipartite, maxiter = 1000)
  83.  
  84. # Plot the combined graph with colors and sizes
  85. plot(g_bipartite, vertex.size = V(g_bipartite)$size, vertex.label = NA,
  86. vertex.color = V(g_bipartite)$color, layout = layout_combined,
  87. main = "User-Company Interaction Network Based on Sentiment")
  88.  
  89. # Calculate total interactions for each company
  90. company_interactions <- user_company_interaction %>%
  91. group_by(mentioned_company) %>%
  92. summarise(total_interactions = n())
  93.  
  94. # Top 10 companies with the most positive connections
  95. top_positive_companies <- positive_interactions %>%
  96. arrange(desc(count)) %>%
  97. head(10)
  98.  
  99. print("Top 10 Companies with the Most Positive Connections:")
  100. print(top_positive_companies)
  101.  
  102. # Calculate the ratio of positive connections to total connections for the top 10 companies
  103. positive_to_total_ratio <- top_positive_companies %>%
  104. mutate(total_connections = company_interactions$total_interactions[match(mentioned_company, company_interactions$mentioned_company)]) %>%
  105. mutate(positive_to_total_ratio = count / total_connections)
  106.  
  107. print("Ratio of Positive Connections to Total Connections for the Top 10 Companies:")
  108. print(positive_to_total_ratio)
  109.  
  110. # Calculate the mean positive_to_total_ratio for all companies
  111. positive_to_total_ratio_all <- company_interactions %>%
  112. left_join(positive_interactions, by = "mentioned_company") %>%
  113. mutate(positive_to_total_ratio = ifelse(is.na(count), 0, count) / total_interactions) %>%
  114. summarise(mean_positive_to_total_ratio = mean(positive_to_total_ratio))
  115.  
  116. # Print the mean positive_to_total_ratio for all companies
  117. print("Mean Positive to Total Ratio for All Companies:")
  118. print(positive_to_total_ratio_all$mean_positive_to_total_ratio)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement