Networks code

# Cargar librerías necesarias
library(dplyr)
library(syuzhet)
library(igraph)
library(tidyr)
library(ggplot2)

# Cargar los datos
data <- read.csv(file.choose(), header = TRUE)

# Convertir created_at a datetime
data$created_at <- as.POSIXct(data$created_at, format = "%a %b %d %H:%:%S %z %Y")

# Análisis de sentimiento en los tweets
sentiment_scores <- get_sentiment(data$text, method = "afinn")

# Añadir los resultados de sentimiento a los datos originales
data_with_sentiments <- cbind(data, sentiment_scores)

# Clasificar sentimiento en categorías: positivo, negativo, neutral
data_with_sentiments$sentiment_category <- ifelse(data_with_sentiments$sentiment > 0, "Positive",
                                                  ifelse(data_with_sentiments$sentiment < 0, "Negative", "Neutral"))

# Extraer autores y sus sentimientos predominantes
author_sentiments <- data_with_sentiments %>%
  group_by(author_id, sentiment_category) %>%
  summarise(count = n()) %>%
  pivot_wider(names_from = sentiment_category, values_from = count, values_fill = list(count = 0))

# Asegurarse de que la matriz es cuadrada
author_matrix <- as.matrix(author_sentiments[,-1])
rownames(author_matrix) <- author_sentiments$author_id

# Calcular la similitud entre autores (por ejemplo, usando el coeficiente de Jaccard)
similarity_matrix <- 1 - as.matrix(dist(author_matrix, method = "binary"))

# Crear la red
g <- graph_from_adjacency_matrix(similarity_matrix, mode = "undirected", weighted = TRUE, diag = FALSE)

# Simplificar la red eliminando enlaces débiles
g <- simplify(g, remove.multiple = TRUE, remove.loops = TRUE)
E(g)$weight <- 1/E(g)$weight # Invertir para que un valor más alto signifique mayor similitud

# Visualizar la red
plot(g, vertex.label = NA, vertex.size = 5, edge.width = E(g)$weight)

# Analizar la red
degree_distribution <- degree(g)
centrality <- closeness(g)
communities <- cluster_louvain(g)

print(degree_distribution)
print(centrality)
print(communities)

# Visualizar comunidades
plot(communities, g, vertex.label = NA, vertex.size = 5)

# Añadir análisis de comunidades a los datos originales
data_with_sentiments$author_community <- communities$membership[match(data_with_sentiments$author_id, V(g)$name)]

# Analizar tiempo de respuesta por comunidad
reply_time_summary <- data_with_sentiments %>%
  filter(!is.na(in_response_to_tweet_id)) %>%
  group_by(author_community, sentiment_category) %>%
  summarise(avg_reply_time = mean(reply_time, na.rm = TRUE))

# Visualizar el tiempo de respuesta por comunidad y categoría de sentimiento
ggplot(reply_time_summary, aes(x = sentiment_category, y = avg_reply_time, fill = as.factor(author_community))) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Average Reply Time by Sentiment Category and Author Community",
       x = "Sentiment Category",
       y = "Average Reply Time (minutes)") +
  theme_minimal()