Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- rm(list = ls())
- while (!is.null(dev.list())) dev.off()
- library(moments)
- library(tidyverse)
- library(ggplot2)
- library(MASS)
- library(patchwork)
- library(hrbrthemes)
- library(gridExtra)
- library(plotly)
- library(highcharter)
- library(orca)
- library(reticulate)
- library(grid)
- library(igraph)
- library(ggraph)
- library(png)
- library(writexl)
- library(fitdistrplus)
- data <- read_delim('database.csv', delim = ";")
- ggplot(data, aes(x=`Cause Category`)) +
- geom_histogram(aes(y = ..density..), bins = 7, color = "black", fill = "grey")
- unique(data$`Cause Category`)
- table(data$`Cause Category`)
- table <- table(data$`Liquid Type`)
- table(data$`Liquid Type`)
- frequenze <- sort(table(data$`Cause Subcategory`))
- dataset <- as.data.frame(frequenze)
- colnames(dataset) <- c("Subcategory", "Frequenza")
- ggplot(dataset, aes(x = Subcategory, y = Frequenza)) +
- geom_bar(stat = "identity") +
- labs(title = "Frequenza degli eventi per sottocategoria",
- x = "Sottocategoria",
- y = "Numero di eventi") +
- theme_minimal() +
- geom_rect(aes(xmin = 30.5, xmax = 38.5, ymin = -10, ymax = 370), color = "red", fill = NA) +
- theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
- frequenze <- table(data$`Cause Subcategory`)
- dataset <- as.data.frame(frequenze)
- colnames(dataset) <- c("Categoria", "Frequenza")
- dataset <- dataset[order(-dataset$Frequenza), ] # Ordina decrescente
- # Mantieni solo le prime 8 categorie, raggruppa le altre in "Altre"
- dataset$Categoria <- ifelse(rank(-dataset$Frequenza) <= 8,
- as.character(dataset$Categoria),
- "ALTRE")
- # Raggruppa "Altre" e ricalcola le frequenze
- dataset <- dataset %>%
- group_by(Categoria) %>%
- summarise(Frequenza = sum(Frequenza))
- # Calcola le percentuali
- dataset$Percentuale <- (dataset$Frequenza / sum(dataset$Frequenza)) * 100
- # Crea il grafico a torta
- library(ggplot2)
- p100 <- ggplot(dataset, aes(x = "", y = Percentuale, fill = Categoria)) +
- geom_bar(stat = "identity", width = 1) + # Grafico a barre
- coord_polar(theta = "y") + # Converti in grafico a torta
- labs(title = "Distribuzione percentuale delle cause",
- x = NULL, y = NULL) +
- theme_void() + # Rimuovi assi e sfondo
- theme(legend.title = element_blank())
- p100
- n <- sum(table)
- table <- table / n * 100
- unique(data$`Cause Subcategory`)
- sort(table(data$`Cause Subcategory`))
- result <- data %>%
- group_by(`Cause Category`) %>%
- summarize(sottocategorie = paste(unique(`Cause Subcategory`), collapse = ' , '))
- result <- data %>%
- group_by(`Cause Category`) %>%
- summarize(sottocategorie = list(unique(`Liquid Type`)))
- Liquid_sub <- data %>%
- group_by(`Cause Subcategory`, `Liquid Type`) %>%
- summarize(count = n()) %>%
- ungroup()
- Liquid_sub_perc <- data %>%
- group_by(`Liquid Type`, `Cause Subcategory`) %>%
- summarize(count = n()) %>%
- ungroup() %>%
- group_by(`Liquid Type`) %>%
- mutate(percentage = (count / sum(count)) * 100) %>%
- ungroup()
- sottocategorie_da_mantenere <- c("INTERNAL", "PIPELINE/EQUIPMENT OVERPRESSURED","PUMP OR PUMP-RELATED EQUIPMENT", "OVERFILL/OVERFLOW OF TANK/VESSEL/SUMP", "TEMPERATURE")
- # solo con alcune sottocategorie
- Liquid_sub_perc <- data %>%
- group_by(`Liquid Type`, `Cause Subcategory`) %>%
- summarize(count = n()) %>%
- ungroup() %>%
- filter(`Cause Subcategory` %in% sottocategorie_da_mantenere) %>% # Filtra solo le sottocategorie specificate
- group_by(`Liquid Type`) %>%
- mutate(percentage = (count / sum(count)) * 100) %>%
- ungroup()
- data3 <- data %>%
- filter((data2$`Unintentional Release (Barrels)` +
- data2$`Intentional Release (Barrels)` -
- data2$`Liquid Recovery (Barrels)`) ==
- data2$`Net Loss (Barrels)`)
- data <- data3
- percentuali <- data %>%
- group_by(`Cause Category`) %>%
- summarise(conteggio = n()) %>%
- mutate(percentuale = conteggio / sum(conteggio) * 100)
- p101 <- ggplot(percentuali, aes(x = "", y = percentuale, fill = `Cause Category`)) +
- geom_bar(stat = "identity", width = 1) + # Grafico a barre
- coord_polar(theta = "y") + # Trasformazione in grafico a torta
- labs(title = "Percentuali delle cause") +
- theme_void()
- percentuali <- data %>%
- group_by(`Liquid Type`) %>%
- summarise(conteggio = n()) %>%
- mutate(percentuale = conteggio / sum(conteggio) * 100)
- p101 <- ggplot(percentuali, aes(x = "", y = percentuale, fill = `Liquid Type`)) +
- geom_bar(stat = "identity", width = 1) + # Grafico a barre
- coord_polar(theta = "y") + # Trasformazione in grafico a torta
- labs(title = "Percentuali delle cause") +
- theme_void()
- p101
- p101 <- p101 + theme(legend.position = "none")
- #--------------------- CATEGORIE VS NET LOSS BARRELS ----------------------------
- Net_loss <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- ) %>%
- mutate(normalized_net_loss = total_net_loss / total_cases)
- p1 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Net Loss (Barrels) MEDIA"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p1
- # element_text(angle = 45, hjust = 1, size = 12))
- filter <- data %>% filter(`Cause Category` == "CORROSION")
- #VIOLIN PLOT
- p1 <- ggplot(data, aes(x = `Cause Category`, y = `Net Loss (Barrels)`, fill = `Cause Category`)) +
- geom_violin(trim = FALSE) + # Mostra la distribuzione completa senza troncamenti
- labs(
- title = "Distribuzione Net Loss (Barrels) per Categoria (Violin Plot)",
- x = "Cause Category",
- y = "Net Loss (Barrels)",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Ruota le etichette per maggiore leggibilità
- p1
- ds_UnintBarrels_10 <- data %>%
- filter(`Unintentional Release (Barrels)` >= 1)
- ds_NetLoss <- data %>%
- filter(`Net Loss (Barrels)` >= 1)
- ds_NetLoss <- data %>%
- filter(`Net Loss (Barrels)` <= 1)
- #BOXPLOT
- p1 <- ggplot(ds_UnintBarrels_10, aes(x = `Cause Category`, y = `Unintentional Release (Barrels)`, fill = `Cause Category`)) +
- geom_boxplot() +
- labs(
- title = "Distribuzione Net Loss (Barrels) per Categoria",
- x = "Cause Category",
- y = "Net Loss (Barrels)",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- ylim(0, 30)
- p1
- Net_loss1 <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- p2 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Net Loss (Barrels) TOTALI"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p2 | p1
- p2 | p101
- p1
- #-------------------- CATEGORIE VS UNINTENTIONAL RELEASE ------------
- Net_loss <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- ) %>%
- mutate(normalized_net_loss = total_net_loss / total_cases)
- p3 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Unintentional Release (Barrels) NORMALIZZATI"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank(), legend.position = "none")
- # element_text(angle = 45, hjust = 1, size = 12))
- Net_loss1 <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- p4 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Unintentional Release (Barrels) TOTALI"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p4 | p3
- #-------------------- CATEGORIE VS INTENTIONAL RELEASE ------------
- Net_loss1 <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = mean(`Intentional Release (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- p5 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Intentional Release (Barrels) MEDIA"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank(), legend.position = "none")
- # , legend.position = "none")
- # element_text(angle = 45, hjust = 1, size = 12))
- p5
- Net_loss1 <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- p6 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Intentional Release (Barrels) TOTALI"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p6|p5
- #-------------------- CATEGORIE VS Liquid recovery RELEASE ------------
- Net_loss <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Liquid Recovery (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- ) %>%
- mutate(normalized_net_loss = total_net_loss / total_cases)
- p7 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Liquid Recovery (Barrels) MEDIA"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank(), legend.position = "none")
- # element_text(angle = 45, hjust = 1, size = 12))
- Net_loss1 <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- total_net_loss = sum(`Liquid Recovery (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- p8 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Liquid recovery (Barrels) TOTALI"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p8 | p7
- p1 + p3 + p5 + p7
- p2 + p4 + p6 + p8
- #--------------------------
- Net_loss <- data %>%
- group_by(`Liquid Type`) %>%
- summarize(
- mean_costs = mean(`Environmental Remediation Costs`, na.rm = TRUE))
- p7 <- ggplot(Net_loss, aes(x = `Liquid Type`, y = mean_costs, fill = `Liquid Type`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Environmental Remediation Costs (MEDIA)"),
- x = "LIquid Type",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- p7
- data2 <- data
- data2$`Intentional Release (Barrels)` <- ifelse(is.na(data2$`Intentional Release (Barrels)`), 0, data2$`Intentional Release (Barrels)`)
- controllo <- character(length(data2$`Report Number`))
- # Usa un ciclo for corretto per iterare attraverso gli indici
- for (i in 1:length(data2$`Report Number`)) {
- if ((data2$`Unintentional Release (Barrels)`[i] +
- data2$`Intentional Release (Barrels)`[i] -
- data2$`Liquid Recovery (Barrels)`[i]) ==
- data2$`Net Loss (Barrels)`[i]) {
- controllo[i] <- "OK"
- } else {
- controllo[i] <- "NO"
- }
- }
- # Grafico a barre impilato con percentuali
- ggplot(Liquid_sub_perc, aes(x = `Cause Subcategory`, y = percentage, fill = `Liquid Type`)) +
- geom_bar(stat = "identity") +
- labs(
- title = "Percentuale di Liquid Type per Cause Subcategory",
- x = "Cause Subcategory",
- y = "Percentuale",
- fill = "Liquid Type"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- scale_y_continuous(labels = scales::percent_format(scale = 1))
- ggplot(Liquid_sub_perc, aes(x = `Liquid Type`, y = percentage, fill = `Cause Subcategory`)) +
- geom_bar(stat = "identity") +
- labs(
- title = "Percentuale di Cause Subcategory per Liquid Type",
- x = "Liquid Type",
- y = "Percentuale",
- fill = "Cause Subcategory"
- ) +
- theme_minimal() +
- theme(axis.text.y = element_text(angle = 0, hjust = 1)) +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) + # Inclinazione del testo
- scale_y_continuous(labels = scales::percent_format(scale = 1))
- write_xlsx(result, "result.xlsx")
- library(dplyr)
- library(ggplot2)
- # Filtra per una singola sottocategoria
- selected_category <- "INTERNAL" # Sostituisci con la sottocategoria che vuoi
- filtered_data <- data %>%
- filter(`Cause Subcategory` == selected_category)
- # Grafico a Barre Impilato
- ggplot(filtered_data, aes(x = `Liquid Type`, y = count, fill = `Liquid Type`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Conteggio di Liquid Type per la sottocategoria:", selected_category),
- x = "Liquid Type",
- y = "Occorrenze",
- fill = "Liquid Type"
- ) +
- theme_minimal()
- # Grafico a Barre Affiancato
- ggplot(filtered_data, aes(x = `Liquid Type`, y = count, fill = `Liquid Type`)) +
- geom_bar(stat = "identity", position = "dodge") +
- labs(
- title = paste("Conteggio di Liquid Type per la sottocategoria:", selected_category),
- x = "Liquid Type",
- y = "Occorrenze",
- fill = "Liquid Type"
- ) +
- theme_minimal()
- # Grafico a barre affiancato
- ggplot(Liquid_sub, aes(x = `Cause Subcategory`, y = count, fill = `Liquid Type`)) +
- geom_bar(stat = "identity", position = "dodge") +
- labs(
- title = "Conteggio di Liquid Type per Cause Subcategory",
- x = "Cause Subcategory",
- y = "Occorrenze",
- fill = "Liquid Type"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))
- #-------------------- DATE --------------------------
- data <- read_delim('database.csv', delim = ";")
- table(data$`Pipeline Shutdown`)
- df_si <- data[!is.na(data$`Pipeline Shutdown`) & data$`Pipeline Shutdown` == "YES", ]
- # Conta il numero di occorrenze per ogni categoria
- conteggio_categoria <- df_si %>% count(`Cause Category`)
- # Calcola la percentuale per ogni categoria
- conteggio_categoria <- conteggio_categoria %>%
- mutate(percentuale = n / sum(n) * 100)
- # Visualizza le percentuali
- p102 <- ggplot(conteggio_categoria, aes(x = "", y = percentuale, fill = `Cause Category`)) +
- geom_bar(stat = "identity", width = 1) +
- coord_polar(theta = "y") +
- theme_void() +
- labs(title = "Percentuale di Cause con chiusura pipeline") +
- theme(legend.title = element_blank(), legend.position = "none")
- p101 | p102
- Net_loss1 <- data %>%
- group_by(`Pipeline Shutdown`) %>%
- summarize(
- total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- ggplot(Net_loss1, aes(x = `Pipeline Shutdown`, y = total_net_loss, fill = `Pipeline Shutdown`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Net Loss (Barrels)"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Cause Category"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- #GRAFICO A BARRE NET LOSS --> SI NO NA
- Net_loss1 <- data %>%
- group_by(`Pipeline Shutdown`) %>%
- summarize(
- total_net_loss = mean(`Net Loss (Barrels)`, na.rm = TRUE),
- total_cases = n() # Conta il numero totale di casi
- )
- ggplot(Net_loss1, aes(x = `Pipeline Shutdown`, y = total_net_loss, fill = `Pipeline Shutdown`)) +
- geom_bar(stat = "identity") +
- labs(
- title = paste("Net Loss (Barrels) MEDIA"),
- x = "Cause Category",
- y = "Total Net Loss",
- fill = "Pipeline Shutdown"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_blank())
- # Crea il grafico a barre
- ggplot(percentages, aes(x = Response, y = percentage, fill = Response)) +
- geom_bar(stat = "identity") +
- labs(
- title = "Percentuale di Occorrenze di 'SI', 'NO' e 'NA'",
- x = "Risposta",
- y = "Percentuale (%)",
- fill = "Risposta"
- ) +
- theme_minimal()
- data_clean <- data %>% filter(!is.na(`Net Loss (Barrels)`))
- # Crea il boxplot
- ggplot(data_clean, aes(x = `Pipeline Shutdown`, y = `Net Loss (Barrels)`, fill = `Pipeline Shutdown`)) +
- geom_boxplot() +
- labs(
- title = "Net Loss (Barrels) by Pipeline Shutdown",
- x = "Pipeline Shutdown",
- y = "Net Loss (Barrels)",
- fill = "Pipeline Shutdown"
- ) +
- scale_y_continuous(limits = c(0, 10)) +
- theme_minimal()
- #CHIUSURA DEFINITIVA
- data$`Accident Date/Time` <- mdy_hm(data$`Accident Date/Time`)
- data$`Shutdown Date/Time` <- mdy_hm(data$`Shutdown Date/Time`)
- data$`Restart Date/Time` <- mdy_hm(data$`Restart Date/Time`)
- filtered_data_yes <- data %>%
- filter(`Pipeline Shutdown` == "YES")
- filtered_data <- data %>%
- filter(`Pipeline Shutdown` == "YES" & is.na(`Restart Date/Time`) & !is.na(`Shutdown Date/Time`))
- table(data$`Pipeline Shutdown`)
- mean(filtered_data$`Net Loss (Barrels)`)
- table(filtered_data$`Cause Category`)
- filtered_yes <- data %>% filter(`Pipeline Shutdown` == "YES" & !is.na(`Shutdown Date/Time`) & !is.na(`Restart Date/Time`))
- mean(filtered_yes$`Net Loss (Barrels)`)
- median(filtered_yes$`Net Loss (Barrels)`)
- median(filtered_data$`Net Loss (Barrels)`)
- sd(filtered_yes$`Net Loss (Barrels)`)
- sd(filtered_data$`Net Loss (Barrels)`)
- hist(filtered_data$`Net Loss (Barrels)`)
- hist(filtered_yes$`Net Loss (Barrels)`)
- filtered_yes$chiusura <- (filtered_yes$`Shutdown Date/Time` - filtered_yes$`Accident Date/Time`) / 3600
- mean(filtered_yes$chiusura)
- tail(filtered_yes$chiusura)
- filtered_yes$chiusura <- as.numeric(filtered_yes$chiusura)
- ggplot(filtered_yes, aes(x = `chiusura`)) +
- geom_histogram(bins = 30, fill = "blue", color = "black") + # bins = 30 specifies the number of bins
- labs(title = "Histogram of Data", x = "Values", y = "Frequency") +
- theme_minimal()
- filtered_yes$durata_chiusura <- as.numeric(filtered_yes$`Restart Date/Time` - filtered_yes$`Shutdown Date/Time`) / 3600
- mean(filtered_yes$durata_chiusura, na.rm = TRUE)
- ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
- geom_histogram(bins = 30, fill = "lightblue", color = "black") + # bins = 30 specifies the number of bins
- labs(title = "Istogramma delle ore di chiusura della Pipeline", x = "Ore", y = "Eventi") +
- theme_minimal()+
- scale_x_continuous(limits = c(0, 200)) +
- geom_vline(aes(xintercept = mean(filtered_yes$durata_chiusura, na.rm = TRUE)),color = 'red', linetype = 1, size = 2)
- filtered_yes <- filtered_yes %>%
- filter(is.finite(durata_chiusura))
- # Verifica la classe della colonna e i valori
- summary(filtered_yes$durata_chiusura) # Riepilogo dei dati
- any(is.na(filtered_yes$durata_chiusura)) # Verifica NA
- any(!is.finite(filtered_yes$durata_chiusura)) # Verifica NaN e Inf
- install.packages("fitdistrplus")
- library(fitdistrplus)
- library(dplyr)
- filtered_yes <- filtered_yes %>%
- filter(chiusura >= 0)
- chiusura <- filtered_yes$chiusura
- filtered_yes$durata_chiusura <- as.numeric(filtered_yes$durata_chiusura)
- fit_gamma <- fitdist(filtered_yes$durata_chiusura, "gamma",
- start = list(shape = shape_init, scale = scale_init),
- method = "mme", # Prova con un metodo di ottimizzazione diverso
- control = list(maxit = 1000))
- fit_gamma$estimate
- shape <- as.numeric(fit_gamma$estimate[1])
- scale <- as.numeric(fit_gamma$estimate[2])
- # Creare una sequenza di valori per il grafico
- x_vals <- seq(1, max(filtered_yes$durata_chiusura), length.out = 1341)
- y_vals <- dgamma(x_vals, shape = shape, scale = scale)
- # Creare il grafico
- ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
- geom_histogram(bins = 30, fill = "lightblue", color = "black", aes(y = ..density..)) +
- labs(title = "Istogramma delle ore di chiusura della Pipeline con distribuzione Gamma",
- x = "Ore", y = "Density") +
- theme_minimal() +
- scale_x_continuous(limits = c(0, 200)) +
- geom_line(aes(x = x_vals, y = y_vals), color = "red", size = 1)
- fit <- fitdist(filtered_yes$durata_chiusura, "exp")
- # Visualizzare i parametri stimati (lambda)
- fit$estimate
- x_vals <- seq(0, max(filtered_yes$durata_chiusura, na.rm = TRUE), length.out = 1341)
- y_vals <- dexp(x_vals, rate = fit$estimate["rate"])
- # Creare il grafico
- ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
- geom_histogram(bins = 30, fill = "lightblue", color = "black", aes(y = ..density..)) +
- labs(title = "Istogramma delle ore di chiusura della Pipeline", x = "Ore", y = "Eventi") +
- theme_minimal() +
- scale_x_continuous(limits = c(0, 200)) +
- geom_vline(aes(xintercept = mean(filtered_yes$durata_chiusura, na.rm = TRUE)), color = 'red', linetype = 1, size = 2) +
- geom_line(aes(x = x_vals, y = y_vals), color = "blue", size = 1) # Aggiungi la curva gamma
- filtered_yes_no_duplicates <- unique(filtered_yes$durata_chiusura)
- ks_test <- ks.test(filtered_yes_no_duplicates, "pexp", rate = fit$estimate["rate"])
- print(ks_test)
- # Visualizzare i risultati del test
- print(ks_test)
- fit_binomiale_negativa <- fitdist(filtered_yes$durata_chiusura, "nbinom")
- # Visualizzare i parametri stimati (size e prob)
- print(fit_binomiale_negativa$estimate)
- pmax(filtered_yes$durata_chiusura, na.rm=TRUE)
- ggplot(data, aes(x = `Liquid Type`, y = `Environmental Remediation Costs`)) +
- geom_point(color = "blue", size = 2) +
- labs(
- title = "Scatter Plot of X vs Y",
- x = "X-axis (Independent Variable)",
- y = "Y-axis (Dependent Variable)"
- ) +
- theme_minimal()
- #EFFICIENZA DEL RECUPERO
- data_noNA <- data %>%
- mutate(`Intentional Release (Barrels)` = ifelse(is.na(`Intentional Release (Barrels)`), 0, `Intentional Release (Barrels)`)) %>%
- filter(!is.na(`Accident State`))
- ds_efficiency <- data_noNA %>%
- group_by(`Pipeline Type`) %>%
- summarize(
- mean_percentage = mean(((`Liquid Recovery (Barrels)`) / (`Unintentional Release (Barrels)`+`Intentional Release (Barrels)`)), na.rm = TRUE) # Calcolo percentuale e media
- )
- ggplot(ds_efficiency, aes(x = reorder(`Accident State`, mean_percentage), y = mean_percentage)) +
- geom_bar(stat = "identity") +
- labs(
- title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
- x = "STATO",
- y = "FREQUENZA"
- ) +
- theme_minimal()
- ggplot(ds_efficiency, aes(x = reorder(`Liquid Type`, mean_percentage), y = mean_percentage)) +
- geom_bar(stat = "identity") +
- labs(
- title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI TIPO DI LIQUIDO",
- x = "TIPO DI LIQUIDO",
- y = "FREQUENZA"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))
- ggplot(ds_efficiency, aes(x = reorder(`Cause Category`, mean_percentage), y = mean_percentage)) +
- geom_bar(stat = "identity") +
- labs(
- title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
- x = "STATO",
- y = "FREQUENZA"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))
- ggplot(ds_efficiency, aes(x = reorder(`Pipeline Type`, mean_percentage), y = mean_percentage)) +
- geom_bar(stat = "identity") +
- labs(
- title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
- x = "PIPELINE TYPE",
- y = "FREQUENZA"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1))
- #---------------------- TOTAL RELEASE --------------------------
- # grafico dei box plot delle distribuzioni dei total release (nuova colonna creata come somma tra Unintentional
- # Intentional), pesata poi sull'efficienza di recupero per ogni categoria
- data$`Intentional Release (Barrels)`[is.na(data$`Intentional Release (Barrels)`)] <- 0
- ds_efficiency <- data %>%
- group_by(`Cause Category`) %>%
- summarize(
- mean_percentage = mean(((`Liquid Recovery (Barrels)`) / (`Unintentional Release (Barrels)`+`Intentional Release (Barrels)`)), na.rm = TRUE) # Calcolo percentuale e media
- )
- ds_UnintBarrels_10 <- data %>%
- filter(`Unintentional Release (Barrels)` >= 1) %>%
- mutate(`Total Release (Barrels)` = `Unintentional Release (Barrels)` + `Intentional Release (Barrels)`)
- ds_UnintBarrels_10 <- ds_UnintBarrels_10 %>%
- left_join(ds_efficiency, by = "Cause Category")
- p1 <- ggplot(ds_UnintBarrels_10, aes(x = `Cause Category`, y = `Total Release (Barrels)`, fill = mean_percentage)) +
- geom_boxplot() +
- labs(
- title = "Distribuzione Total Release (Barrels) per Categoria",
- x = "Cause Category",
- y = "Barrels",
- fill = "Recovery Efficiency"
- ) +
- theme_minimal() +
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
- ylim(0, 30)+
- scale_fill_gradientn(colors = c("red", "yellow", "green"))
- p1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement