Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Cargar librerías necesarias
- library(dplyr)
- library(syuzhet)
- library(igraph)
- library(tidyr)
- library(ggplot2)
- # Cargar los datos
- data <- read.csv(file.choose(), header = TRUE)
- # Convertir created_at a datetime
- data$created_at <- as.POSIXct(data$created_at, format = "%a %b %d %H:%:%S %z %Y")
- # Análisis de sentimiento en los tweets
- sentiment_scores <- get_sentiment(data$text, method = "afinn")
- # Añadir los resultados de sentimiento a los datos originales
- data_with_sentiments <- cbind(data, sentiment_scores)
- # Clasificar sentimiento en categorías: positivo, negativo, neutral
- data_with_sentiments$sentiment_category <- ifelse(data_with_sentiments$sentiment > 0, "Positive",
- ifelse(data_with_sentiments$sentiment < 0, "Negative", "Neutral"))
- # Extraer autores y sus sentimientos predominantes
- author_sentiments <- data_with_sentiments %>%
- group_by(author_id, sentiment_category) %>%
- summarise(count = n()) %>%
- pivot_wider(names_from = sentiment_category, values_from = count, values_fill = list(count = 0))
- # Asegurarse de que la matriz es cuadrada
- author_matrix <- as.matrix(author_sentiments[,-1])
- rownames(author_matrix) <- author_sentiments$author_id
- # Calcular la similitud entre autores (por ejemplo, usando el coeficiente de Jaccard)
- similarity_matrix <- 1 - as.matrix(dist(author_matrix, method = "binary"))
- # Crear la red
- g <- graph_from_adjacency_matrix(similarity_matrix, mode = "undirected", weighted = TRUE, diag = FALSE)
- # Simplificar la red eliminando enlaces débiles
- g <- simplify(g, remove.multiple = TRUE, remove.loops = TRUE)
- E(g)$weight <- 1/E(g)$weight # Invertir para que un valor más alto signifique mayor similitud
- # Visualizar la red
- plot(g, vertex.label = NA, vertex.size = 5, edge.width = E(g)$weight)
- # Analizar la red
- degree_distribution <- degree(g)
- centrality <- closeness(g)
- communities <- cluster_louvain(g)
- print(degree_distribution)
- print(centrality)
- print(communities)
- # Visualizar comunidades
- plot(communities, g, vertex.label = NA, vertex.size = 5)
- # Añadir análisis de comunidades a los datos originales
- data_with_sentiments$author_community <- communities$membership[match(data_with_sentiments$author_id, V(g)$name)]
- # Analizar tiempo de respuesta por comunidad
- reply_time_summary <- data_with_sentiments %>%
- filter(!is.na(in_response_to_tweet_id)) %>%
- group_by(author_community, sentiment_category) %>%
- summarise(avg_reply_time = mean(reply_time, na.rm = TRUE))
- # Visualizar el tiempo de respuesta por comunidad y categoría de sentimiento
- ggplot(reply_time_summary, aes(x = sentiment_category, y = avg_reply_time, fill = as.factor(author_community))) +
- geom_bar(stat = "identity", position = "dodge") +
- labs(title = "Average Reply Time by Sentiment Category and Author Community",
- x = "Sentiment Category",
- y = "Average Reply Time (minutes)") +
- theme_minimal()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement