Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #################Sin bucle###
- # Load required libraries
- library(dplyr)
- library(syuzhet)
- library(igraph)
- library(tidyr)
- library(stringr)
- # Load the dataset
- data <- read.csv(file.choose(), header = TRUE)
- # Sample a quarter of the rows
- set.seed(123) # To make the sampling reproducible
- sampled_data <- data %>%
- sample_frac(0.001)
- # Convert created_at to datetime
- sampled_data$created_at <- as.POSIXct(sampled_data$created_at, format = "%a %b %d %H:%M:%S %z %Y")
- # Perform sentiment analysis on the text data
- sentiment_scores <- get_sentiment(sampled_data$text, method = "afinn")
- # Merge sentiment scores with the original data
- data_with_sentiments <- cbind(sampled_data, sentiment_scores)
- # Classify sentiment into categories: positive, negative, neutral
- data_with_sentiments$sentiment_category <- ifelse(data_with_sentiments$sentiment > 0, "Positive",
- ifelse(data_with_sentiments$sentiment < 0, "Negative", "Neutral"))
- # Filter inbound tweets (from users to companies)
- inbound_tweets <- data_with_sentiments %>% filter(inbound == "True")
- # Filter outbound tweets (companies)
- outbound_tweets <- data_with_sentiments %>% filter(inbound == "False")
- # Extract mentioned companies from inbound tweets using regex
- inbound_tweets <- inbound_tweets %>%
- mutate(mentioned_company = str_extract(text, "(?<=@)\\w+"))
- # Filter only valid mentions
- inbound_tweets <- inbound_tweets %>% filter(!is.na(mentioned_company))
- # Filter out companies whose mentioned_company is completely numeric
- inbound_tweets <- inbound_tweets %>% filter(!str_detect(mentioned_company, "^\\d+$"))
- # Create a user-company interaction data frame
- user_company_interaction <- inbound_tweets %>%
- select(author_id, mentioned_company, sentiment_category)
- # Create a bipartite graph
- bipartite_edges <- as.matrix(user_company_interaction %>% select(author_id, mentioned_company))
- g_bipartite <- graph_from_edgelist(bipartite_edges, directed = FALSE)
- # Set bipartite type (TRUE for companies, FALSE for users)
- V(g_bipartite)$type <- V(g_bipartite)$name %in% unique(user_company_interaction$mentioned_company)
- # Assign sentiment category to vertices
- V(g_bipartite)$sentiment_category <- ifelse(V(g_bipartite)$type, NA, user_company_interaction$sentiment_category[match(V(g_bipartite)$name, user_company_interaction$author_id)])
- # Assign colors to vertices based on type and sentiment
- V(g_bipartite)$color <- ifelse(V(g_bipartite)$type, "yellow",
- ifelse(V(g_bipartite)$sentiment_category == "Positive", "green",
- ifelse(V(g_bipartite)$sentiment_category == "Negative", "red", "blue")))
- # Increase the size of company nodes based on the number of positive interactions
- positive_interactions <- inbound_tweets %>%
- filter(sentiment_category == "Positive") %>%
- group_by(mentioned_company) %>%
- summarise(count = n())
- vertex_sizes <- sapply(V(g_bipartite)$name, function(x) {
- if (x %in% positive_interactions$mentioned_company) {
- return(log(positive_interactions$count[positive_interactions$mentioned_company == x] + 1) * 5)
- } else {
- return(5) # default size for users and companies with no positive interactions
- }
- })
- V(g_bipartite)$size <- vertex_sizes
- # Use the Kamada-Kawai layout for better separation
- layout_combined <- layout_with_kk(g_bipartite, maxiter = 1000)
- # Plot the combined graph with colors and sizes
- plot(g_bipartite, vertex.size = V(g_bipartite)$size, vertex.label = NA,
- vertex.color = V(g_bipartite)$color, layout = layout_combined,
- main = "User-Company Interaction Network Based on Sentiment")
- # Calculate total interactions for each company
- company_interactions <- user_company_interaction %>%
- group_by(mentioned_company) %>%
- summarise(total_interactions = n())
- # Top 10 companies with the most positive connections
- top_positive_companies <- positive_interactions %>%
- arrange(desc(count)) %>%
- head(10)
- print("Top 10 Companies with the Most Positive Connections:")
- print(top_positive_companies)
- # Calculate the ratio of positive connections to total connections for the top 10 companies
- positive_to_total_ratio <- top_positive_companies %>%
- mutate(total_connections = company_interactions$total_interactions[match(mentioned_company, company_interactions$mentioned_company)]) %>%
- mutate(positive_to_total_ratio = count / total_connections)
- print("Ratio of Positive Connections to Total Connections for the Top 10 Companies:")
- print(positive_to_total_ratio)
- # Calculate the mean positive_to_total_ratio for all companies
- positive_to_total_ratio_all <- company_interactions %>%
- left_join(positive_interactions, by = "mentioned_company") %>%
- mutate(positive_to_total_ratio = ifelse(is.na(count), 0, count) / total_interactions) %>%
- summarise(mean_positive_to_total_ratio = mean(positive_to_total_ratio))
- # Print the mean positive_to_total_ratio for all companies
- print("Mean Positive to Total Ratio for All Companies:")
- print(positive_to_total_ratio_all$mean_positive_to_total_ratio)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement