Advertisement
federico_carosio

Untitled

Dec 4th, 2024
1,546
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 25.47 KB | None | 0 0
  1. rm(list = ls())
  2. while (!is.null(dev.list())) dev.off()
  3. library(moments)
  4. library(tidyverse)
  5. library(ggplot2)
  6. library(MASS)
  7. library(patchwork)
  8. library(hrbrthemes)
  9. library(gridExtra)
  10. library(plotly)
  11. library(highcharter)
  12. library(orca)
  13. library(reticulate)
  14. library(grid)
  15. library(igraph)
  16. library(ggraph)
  17. library(png)
  18. library(writexl)
  19. library(fitdistrplus)
  20.  
  21.  
  22. data <- read_delim('database.csv', delim = ";")
  23.  
  24.  
  25.  
  26. ggplot(data, aes(x=`Cause Category`)) +
  27.   geom_histogram(aes(y = ..density..), bins = 7, color = "black", fill = "grey")
  28.  
  29. unique(data$`Cause Category`)
  30. table(data$`Cause Category`)
  31. table <- table(data$`Liquid Type`)
  32. table(data$`Liquid Type`)
  33.  
  34. frequenze <- sort(table(data$`Cause Subcategory`))
  35. dataset <- as.data.frame(frequenze)
  36. colnames(dataset) <- c("Subcategory", "Frequenza")
  37.  
  38. ggplot(dataset, aes(x = Subcategory, y = Frequenza)) +
  39.   geom_bar(stat = "identity") +
  40.   labs(title = "Frequenza degli eventi per sottocategoria",
  41.        x = "Sottocategoria",
  42.        y = "Numero di eventi") +
  43.   theme_minimal() +
  44.   geom_rect(aes(xmin = 30.5, xmax = 38.5, ymin = -10, ymax = 370), color = "red", fill = NA) +  
  45.   theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none")
  46.  
  47.  
  48. frequenze <- table(data$`Cause Subcategory`)
  49. dataset <- as.data.frame(frequenze)
  50. colnames(dataset) <- c("Categoria", "Frequenza")
  51. dataset <- dataset[order(-dataset$Frequenza), ]  # Ordina decrescente
  52.  
  53. # Mantieni solo le prime 8 categorie, raggruppa le altre in "Altre"
  54. dataset$Categoria <- ifelse(rank(-dataset$Frequenza) <= 8,
  55.                             as.character(dataset$Categoria),
  56.                             "ALTRE")
  57. # Raggruppa "Altre" e ricalcola le frequenze
  58. dataset <- dataset %>%
  59.   group_by(Categoria) %>%
  60.   summarise(Frequenza = sum(Frequenza))
  61.  
  62. # Calcola le percentuali
  63. dataset$Percentuale <- (dataset$Frequenza / sum(dataset$Frequenza)) * 100
  64.  
  65. # Crea il grafico a torta
  66. library(ggplot2)
  67. p100 <- ggplot(dataset, aes(x = "", y = Percentuale, fill = Categoria)) +
  68.   geom_bar(stat = "identity", width = 1) +  # Grafico a barre
  69.   coord_polar(theta = "y") +                # Converti in grafico a torta
  70.   labs(title = "Distribuzione percentuale delle cause",
  71.        x = NULL, y = NULL) +
  72.   theme_void() +                            # Rimuovi assi e sfondo
  73.   theme(legend.title = element_blank())
  74. p100
  75.  
  76. n <- sum(table)
  77. table <- table / n * 100
  78.  
  79.  
  80. unique(data$`Cause Subcategory`)
  81. sort(table(data$`Cause Subcategory`))
  82.  
  83. result <- data %>%
  84.   group_by(`Cause Category`) %>%
  85.   summarize(sottocategorie = paste(unique(`Cause Subcategory`), collapse = ' , '))
  86.  
  87. result <- data %>%
  88.   group_by(`Cause Category`) %>%
  89.   summarize(sottocategorie = list(unique(`Liquid Type`)))
  90.  
  91. Liquid_sub <- data %>%
  92.   group_by(`Cause Subcategory`, `Liquid Type`) %>%
  93.   summarize(count = n()) %>%
  94.   ungroup()
  95.  
  96.  
  97. Liquid_sub_perc <- data %>%
  98.   group_by(`Liquid Type`, `Cause Subcategory`) %>%
  99.   summarize(count = n()) %>%
  100.   ungroup() %>%
  101.   group_by(`Liquid Type`) %>%
  102.   mutate(percentage = (count / sum(count)) * 100) %>%
  103.   ungroup()
  104.  
  105. sottocategorie_da_mantenere <- c("INTERNAL", "PIPELINE/EQUIPMENT OVERPRESSURED","PUMP OR PUMP-RELATED EQUIPMENT", "OVERFILL/OVERFLOW OF TANK/VESSEL/SUMP", "TEMPERATURE")
  106.  
  107. # solo con alcune sottocategorie
  108. Liquid_sub_perc <- data %>%
  109.   group_by(`Liquid Type`, `Cause Subcategory`) %>%
  110.   summarize(count = n()) %>%
  111.   ungroup() %>%
  112.   filter(`Cause Subcategory` %in% sottocategorie_da_mantenere) %>%  # Filtra solo le sottocategorie specificate
  113.   group_by(`Liquid Type`) %>%
  114.   mutate(percentage = (count / sum(count)) * 100) %>%
  115.   ungroup()
  116.  
  117.  
  118. data3 <- data %>%
  119.   filter((data2$`Unintentional Release (Barrels)` +
  120.             data2$`Intentional Release (Barrels)` -
  121.             data2$`Liquid Recovery (Barrels)`) ==
  122.            data2$`Net Loss (Barrels)`)
  123. data <- data3
  124.  
  125. percentuali <- data %>%
  126.   group_by(`Cause Category`) %>%
  127.   summarise(conteggio = n()) %>%
  128.   mutate(percentuale = conteggio / sum(conteggio) * 100)
  129.  
  130. p101 <- ggplot(percentuali, aes(x = "", y = percentuale, fill = `Cause Category`)) +
  131.   geom_bar(stat = "identity", width = 1) + # Grafico a barre
  132.   coord_polar(theta = "y") +               # Trasformazione in grafico a torta
  133.   labs(title = "Percentuali delle cause") +
  134.   theme_void()
  135.  
  136. percentuali <- data %>%
  137.   group_by(`Liquid Type`) %>%
  138.   summarise(conteggio = n()) %>%
  139.   mutate(percentuale = conteggio / sum(conteggio) * 100)
  140.  
  141. p101 <- ggplot(percentuali, aes(x = "", y = percentuale, fill = `Liquid Type`)) +
  142.   geom_bar(stat = "identity", width = 1) + # Grafico a barre
  143.   coord_polar(theta = "y") +               # Trasformazione in grafico a torta
  144.   labs(title = "Percentuali delle cause") +
  145.   theme_void()
  146. p101
  147.  
  148. p101 <- p101 + theme(legend.position = "none")
  149. #--------------------- CATEGORIE VS NET LOSS BARRELS ----------------------------
  150.  
  151. Net_loss <- data %>%
  152.   group_by(`Cause Category`) %>%
  153.   summarize(
  154.     total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
  155.     total_cases = n()  # Conta il numero totale di casi
  156.   ) %>%
  157.   mutate(normalized_net_loss = total_net_loss / total_cases)
  158.  
  159. p1 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
  160.   geom_bar(stat = "identity") +
  161.   labs(
  162.     title = paste("Net Loss (Barrels) MEDIA"),
  163.     x = "Cause Category",
  164.     y = "Total Net Loss",
  165.     fill = "Cause Category"
  166.   ) +
  167.   theme_minimal() +
  168.   theme(axis.text.x = element_blank())
  169. p1
  170.           # element_text(angle = 45, hjust = 1, size = 12))
  171. filter <- data %>% filter(`Cause Category` == "CORROSION")
  172.  
  173. #VIOLIN PLOT
  174. p1 <- ggplot(data, aes(x = `Cause Category`, y = `Net Loss (Barrels)`, fill = `Cause Category`)) +
  175.   geom_violin(trim = FALSE) +  # Mostra la distribuzione completa senza troncamenti
  176.   labs(
  177.     title = "Distribuzione Net Loss (Barrels) per Categoria (Violin Plot)",
  178.     x = "Cause Category",
  179.     y = "Net Loss (Barrels)",
  180.     fill = "Cause Category"
  181.   ) +
  182.   theme_minimal() +
  183.   theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Ruota le etichette per maggiore leggibilità
  184. p1
  185.  
  186. ds_UnintBarrels_10 <- data %>%
  187.   filter(`Unintentional Release (Barrels)` >= 1)
  188.  
  189. ds_NetLoss <- data %>%
  190.   filter(`Net Loss (Barrels)` >= 1)
  191.  
  192.  
  193. ds_NetLoss <- data %>%
  194.   filter(`Net Loss (Barrels)` <= 1)
  195. #BOXPLOT
  196. p1 <- ggplot(ds_UnintBarrels_10, aes(x = `Cause Category`, y = `Unintentional Release (Barrels)`, fill = `Cause Category`)) +
  197.   geom_boxplot() +
  198.   labs(
  199.     title = "Distribuzione Net Loss (Barrels) per Categoria",
  200.     x = "Cause Category",
  201.     y = "Net Loss (Barrels)",
  202.     fill = "Cause Category"
  203.   ) +
  204.   theme_minimal() +
  205.   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  206.   ylim(0, 30)
  207. p1
  208.      
  209. Net_loss1 <- data %>%
  210.   group_by(`Cause Category`) %>%
  211.   summarize(
  212.     total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
  213.     total_cases = n()  # Conta il numero totale di casi
  214.   )
  215.  
  216. p2 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
  217.   geom_bar(stat = "identity") +
  218.   labs(
  219.     title = paste("Net Loss (Barrels) TOTALI"),
  220.     x = "Cause Category",
  221.     y = "Total Net Loss",
  222.     fill = "Cause Category"
  223.   ) +
  224.   theme_minimal() +
  225.   theme(axis.text.x = element_blank())
  226.  
  227. p2 | p1
  228. p2 | p101
  229. p1
  230.  
  231. #-------------------- CATEGORIE VS UNINTENTIONAL RELEASE ------------
  232.  
  233. Net_loss <- data %>%
  234.   group_by(`Cause Category`) %>%
  235.   summarize(
  236.     total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
  237.     total_cases = n()  # Conta il numero totale di casi
  238.   ) %>%
  239.   mutate(normalized_net_loss = total_net_loss / total_cases)
  240.  
  241. p3 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
  242.   geom_bar(stat = "identity") +
  243.   labs(
  244.     title = paste("Unintentional Release (Barrels) NORMALIZZATI"),
  245.     x = "Cause Category",
  246.     y = "Total Net Loss",
  247.     fill = "Cause Category"
  248.   ) +
  249.   theme_minimal() +
  250.   theme(axis.text.x = element_blank(), legend.position = "none")
  251. # element_text(angle = 45, hjust = 1, size = 12))
  252.  
  253. Net_loss1 <- data %>%
  254.   group_by(`Cause Category`) %>%
  255.   summarize(
  256.     total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
  257.     total_cases = n()  # Conta il numero totale di casi
  258.   )
  259.  
  260. p4 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
  261.   geom_bar(stat = "identity") +
  262.   labs(
  263.     title = paste("Unintentional Release (Barrels) TOTALI"),
  264.     x = "Cause Category",
  265.     y = "Total Net Loss",
  266.     fill = "Cause Category"
  267.   ) +
  268.   theme_minimal() +
  269.   theme(axis.text.x = element_blank())
  270.  
  271. p4 | p3
  272.  
  273.  
  274. #-------------------- CATEGORIE VS INTENTIONAL RELEASE ------------
  275.  
  276. Net_loss1 <- data %>%
  277.   group_by(`Cause Category`) %>%
  278.   summarize(
  279.     total_net_loss = mean(`Intentional Release (Barrels)`, na.rm = TRUE),
  280.     total_cases = n()  # Conta il numero totale di casi
  281.   )
  282.  
  283. p5 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
  284.   geom_bar(stat = "identity") +
  285.   labs(
  286.     title = paste("Intentional Release (Barrels) MEDIA"),
  287.     x = "Cause Category",
  288.     y = "Total Net Loss",
  289.     fill = "Cause Category"
  290.   ) +
  291.   theme_minimal() +
  292.   theme(axis.text.x = element_blank(), legend.position = "none")
  293. # , legend.position = "none")
  294. # element_text(angle = 45, hjust = 1, size = 12))
  295.  
  296. p5
  297. Net_loss1 <- data %>%
  298.   group_by(`Cause Category`) %>%
  299.   summarize(
  300.     total_net_loss = sum(`Unintentional Release (Barrels)`, na.rm = TRUE),
  301.     total_cases = n()  # Conta il numero totale di casi
  302.   )
  303.  
  304. p6 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
  305.   geom_bar(stat = "identity") +
  306.   labs(
  307.     title = paste("Intentional Release (Barrels) TOTALI"),
  308.     x = "Cause Category",
  309.     y = "Total Net Loss",
  310.     fill = "Cause Category"
  311.   ) +
  312.   theme_minimal() +
  313.   theme(axis.text.x = element_blank())
  314.  
  315.  
  316. p6|p5
  317.  
  318. #-------------------- CATEGORIE VS Liquid recovery RELEASE ------------
  319.  
  320. Net_loss <- data %>%
  321.   group_by(`Cause Category`) %>%
  322.   summarize(
  323.     total_net_loss = sum(`Liquid Recovery (Barrels)`, na.rm = TRUE),
  324.     total_cases = n()  # Conta il numero totale di casi
  325.   ) %>%
  326.   mutate(normalized_net_loss = total_net_loss / total_cases)
  327.  
  328. p7 <- ggplot(Net_loss, aes(x = `Cause Category`, y = normalized_net_loss, fill = `Cause Category`)) +
  329.   geom_bar(stat = "identity") +
  330.   labs(
  331.     title = paste("Liquid Recovery (Barrels) MEDIA"),
  332.     x = "Cause Category",
  333.     y = "Total Net Loss",
  334.     fill = "Cause Category"
  335.   ) +
  336.   theme_minimal() +
  337.   theme(axis.text.x = element_blank(), legend.position = "none")
  338. # element_text(angle = 45, hjust = 1, size = 12))
  339.  
  340. Net_loss1 <- data %>%
  341.   group_by(`Cause Category`) %>%
  342.   summarize(
  343.     total_net_loss = sum(`Liquid Recovery (Barrels)`, na.rm = TRUE),
  344.     total_cases = n()  # Conta il numero totale di casi
  345.   )
  346.  
  347. p8 <- ggplot(Net_loss1, aes(x = `Cause Category`, y = total_net_loss, fill = `Cause Category`)) +
  348.   geom_bar(stat = "identity") +
  349.   labs(
  350.     title = paste("Liquid recovery (Barrels) TOTALI"),
  351.     x = "Cause Category",
  352.     y = "Total Net Loss",
  353.     fill = "Cause Category"
  354.   ) +
  355.   theme_minimal() +
  356.   theme(axis.text.x = element_blank())
  357.  
  358. p8 | p7
  359. p1 + p3 + p5 + p7
  360. p2 + p4 + p6 + p8
  361.  
  362. #--------------------------
  363. Net_loss <- data %>%
  364.   group_by(`Liquid Type`) %>%
  365.   summarize(
  366.     mean_costs = mean(`Environmental Remediation Costs`, na.rm = TRUE))
  367.  
  368. p7 <- ggplot(Net_loss, aes(x = `Liquid Type`, y = mean_costs, fill = `Liquid Type`)) +
  369.   geom_bar(stat = "identity") +
  370.   labs(
  371.     title = paste("Environmental Remediation Costs (MEDIA)"),
  372.     x = "LIquid Type",
  373.     y = "Total Net Loss",
  374.     fill = "Cause Category"
  375.   ) +
  376.   theme_minimal() +
  377.   theme(axis.text.x = element_blank())
  378. p7
  379. data2 <- data
  380. data2$`Intentional Release (Barrels)` <- ifelse(is.na(data2$`Intentional Release (Barrels)`), 0, data2$`Intentional Release (Barrels)`)
  381.  
  382. controllo <- character(length(data2$`Report Number`))
  383.  
  384. # Usa un ciclo for corretto per iterare attraverso gli indici
  385. for (i in 1:length(data2$`Report Number`)) {
  386.   if ((data2$`Unintentional Release (Barrels)`[i] +
  387.        data2$`Intentional Release (Barrels)`[i] -
  388.        data2$`Liquid Recovery (Barrels)`[i]) ==
  389.       data2$`Net Loss (Barrels)`[i]) {
  390.     controllo[i] <- "OK"
  391.   } else {
  392.     controllo[i] <- "NO"
  393.   }
  394. }
  395.  
  396.  
  397.  
  398. # Grafico a barre impilato con percentuali
  399. ggplot(Liquid_sub_perc, aes(x = `Cause Subcategory`, y = percentage, fill = `Liquid Type`)) +
  400.   geom_bar(stat = "identity") +
  401.   labs(
  402.     title = "Percentuale di Liquid Type per Cause Subcategory",
  403.     x = "Cause Subcategory",
  404.     y = "Percentuale",
  405.     fill = "Liquid Type"
  406.   ) +
  407.   theme_minimal() +
  408.   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  409.   scale_y_continuous(labels = scales::percent_format(scale = 1))
  410.  
  411. ggplot(Liquid_sub_perc, aes(x = `Liquid Type`, y = percentage, fill = `Cause Subcategory`)) +
  412.   geom_bar(stat = "identity") +
  413.   labs(
  414.     title = "Percentuale di Cause Subcategory per Liquid Type",
  415.     x = "Liquid Type",
  416.     y = "Percentuale",
  417.     fill = "Cause Subcategory"
  418.   ) +
  419.   theme_minimal() +
  420.   theme(axis.text.y = element_text(angle = 0, hjust = 1)) +
  421.   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Inclinazione del testo
  422.   scale_y_continuous(labels = scales::percent_format(scale = 1))
  423.  
  424.  
  425. write_xlsx(result, "result.xlsx")
  426.  
  427. library(dplyr)
  428. library(ggplot2)
  429.  
  430.  
  431. # Filtra per una singola sottocategoria
  432. selected_category <- "INTERNAL"  # Sostituisci con la sottocategoria che vuoi
  433. filtered_data <- data %>%
  434.   filter(`Cause Subcategory` == selected_category)
  435.  
  436. # Grafico a Barre Impilato
  437. ggplot(filtered_data, aes(x = `Liquid Type`, y = count, fill = `Liquid Type`)) +
  438.   geom_bar(stat = "identity") +
  439.   labs(
  440.     title = paste("Conteggio di Liquid Type per la sottocategoria:", selected_category),
  441.     x = "Liquid Type",
  442.     y = "Occorrenze",
  443.     fill = "Liquid Type"
  444.   ) +
  445.   theme_minimal()
  446.  
  447. # Grafico a Barre Affiancato
  448. ggplot(filtered_data, aes(x = `Liquid Type`, y = count, fill = `Liquid Type`)) +
  449.   geom_bar(stat = "identity", position = "dodge") +
  450.   labs(
  451.     title = paste("Conteggio di Liquid Type per la sottocategoria:", selected_category),
  452.     x = "Liquid Type",
  453.     y = "Occorrenze",
  454.     fill = "Liquid Type"
  455.   ) +
  456.   theme_minimal()
  457.  
  458. # Grafico a barre affiancato
  459. ggplot(Liquid_sub, aes(x = `Cause Subcategory`, y = count, fill = `Liquid Type`)) +
  460.   geom_bar(stat = "identity", position = "dodge") +
  461.   labs(
  462.     title = "Conteggio di Liquid Type per Cause Subcategory",
  463.     x = "Cause Subcategory",
  464.     y = "Occorrenze",
  465.     fill = "Liquid Type"
  466.   ) +
  467.   theme_minimal() +
  468.   theme(axis.text.x = element_text(angle = 45, hjust = 1))
  469.  
  470.  
  471.  
  472.  
  473. #-------------------- DATE --------------------------
  474.  
  475. data <- read_delim('database.csv', delim = ";")
  476.  
  477. table(data$`Pipeline Shutdown`)
  478.  
  479. df_si <- data[!is.na(data$`Pipeline Shutdown`) & data$`Pipeline Shutdown` == "YES", ]
  480. # Conta il numero di occorrenze per ogni categoria
  481. conteggio_categoria <- df_si %>% count(`Cause Category`)
  482.  
  483. # Calcola la percentuale per ogni categoria
  484. conteggio_categoria <- conteggio_categoria %>%
  485.   mutate(percentuale = n / sum(n) * 100)
  486.  
  487. # Visualizza le percentuali
  488. p102 <- ggplot(conteggio_categoria, aes(x = "", y = percentuale, fill = `Cause Category`)) +
  489.   geom_bar(stat = "identity", width = 1) +
  490.   coord_polar(theta = "y") +
  491.   theme_void() +
  492.   labs(title = "Percentuale di Cause con chiusura pipeline") +
  493.   theme(legend.title = element_blank(), legend.position = "none")
  494.  
  495. p101 | p102
  496.  
  497. Net_loss1 <- data %>%
  498.   group_by(`Pipeline Shutdown`) %>%
  499.   summarize(
  500.     total_net_loss = sum(`Net Loss (Barrels)`, na.rm = TRUE),
  501.     total_cases = n()  # Conta il numero totale di casi
  502.   )
  503.  
  504. ggplot(Net_loss1, aes(x = `Pipeline Shutdown`, y = total_net_loss, fill = `Pipeline Shutdown`)) +
  505.   geom_bar(stat = "identity") +
  506.   labs(
  507.     title = paste("Net Loss (Barrels)"),
  508.     x = "Cause Category",
  509.     y = "Total Net Loss",
  510.     fill = "Cause Category"
  511.   ) +
  512.   theme_minimal() +
  513.   theme(axis.text.x = element_blank())
  514.  
  515. #GRAFICO A BARRE NET LOSS --> SI NO NA
  516. Net_loss1 <- data %>%
  517.   group_by(`Pipeline Shutdown`) %>%
  518.   summarize(
  519.     total_net_loss = mean(`Net Loss (Barrels)`, na.rm = TRUE),
  520.     total_cases = n()  # Conta il numero totale di casi
  521.   )
  522.  
  523. ggplot(Net_loss1, aes(x = `Pipeline Shutdown`, y = total_net_loss, fill = `Pipeline Shutdown`)) +
  524.   geom_bar(stat = "identity") +
  525.   labs(
  526.     title = paste("Net Loss (Barrels) MEDIA"),
  527.     x = "Cause Category",
  528.     y = "Total Net Loss",
  529.     fill = "Pipeline Shutdown"
  530.   ) +
  531.   theme_minimal() +
  532.   theme(axis.text.x = element_blank())
  533.  
  534.  
  535.  
  536. # Crea il grafico a barre
  537. ggplot(percentages, aes(x = Response, y = percentage, fill = Response)) +
  538.   geom_bar(stat = "identity") +
  539.   labs(
  540.     title = "Percentuale di Occorrenze di 'SI', 'NO' e 'NA'",
  541.     x = "Risposta",
  542.     y = "Percentuale (%)",
  543.     fill = "Risposta"
  544.   ) +
  545.   theme_minimal()
  546.  
  547. data_clean <- data %>% filter(!is.na(`Net Loss (Barrels)`))
  548.  
  549. # Crea il boxplot
  550. ggplot(data_clean, aes(x = `Pipeline Shutdown`, y = `Net Loss (Barrels)`, fill = `Pipeline Shutdown`)) +
  551.   geom_boxplot() +
  552.   labs(
  553.     title = "Net Loss (Barrels) by Pipeline Shutdown",
  554.     x = "Pipeline Shutdown",
  555.     y = "Net Loss (Barrels)",
  556.     fill = "Pipeline Shutdown"
  557.   ) +
  558.   scale_y_continuous(limits = c(0, 10)) +
  559.   theme_minimal()
  560.  
  561.  
  562. #CHIUSURA DEFINITIVA
  563. data$`Accident Date/Time` <- mdy_hm(data$`Accident Date/Time`)
  564. data$`Shutdown Date/Time` <- mdy_hm(data$`Shutdown Date/Time`)
  565. data$`Restart Date/Time` <- mdy_hm(data$`Restart Date/Time`)
  566.  
  567. filtered_data_yes <- data %>%
  568.   filter(`Pipeline Shutdown` == "YES")
  569.  
  570. filtered_data <- data %>%
  571.   filter(`Pipeline Shutdown` == "YES" & is.na(`Restart Date/Time`) & !is.na(`Shutdown Date/Time`))
  572. table(data$`Pipeline Shutdown`)
  573. mean(filtered_data$`Net Loss (Barrels)`)
  574. table(filtered_data$`Cause Category`)
  575.  
  576. filtered_yes <- data %>% filter(`Pipeline Shutdown` == "YES"  & !is.na(`Shutdown Date/Time`) & !is.na(`Restart Date/Time`))
  577. mean(filtered_yes$`Net Loss (Barrels)`)
  578. median(filtered_yes$`Net Loss (Barrels)`)
  579. median(filtered_data$`Net Loss (Barrels)`)
  580.  
  581. sd(filtered_yes$`Net Loss (Barrels)`)
  582. sd(filtered_data$`Net Loss (Barrels)`)
  583.  
  584. hist(filtered_data$`Net Loss (Barrels)`)
  585. hist(filtered_yes$`Net Loss (Barrels)`)
  586.  
  587. filtered_yes$chiusura <- (filtered_yes$`Shutdown Date/Time` - filtered_yes$`Accident Date/Time`) / 3600
  588. mean(filtered_yes$chiusura)
  589. tail(filtered_yes$chiusura)
  590. filtered_yes$chiusura <- as.numeric(filtered_yes$chiusura)
  591.  
  592. ggplot(filtered_yes, aes(x = `chiusura`)) +
  593.   geom_histogram(bins = 30, fill = "blue", color = "black") +  # bins = 30 specifies the number of bins
  594.   labs(title = "Histogram of Data", x = "Values", y = "Frequency") +
  595.   theme_minimal()
  596.  
  597. filtered_yes$durata_chiusura <- as.numeric(filtered_yes$`Restart Date/Time` - filtered_yes$`Shutdown Date/Time`) / 3600
  598. mean(filtered_yes$durata_chiusura, na.rm = TRUE)
  599.  
  600. ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
  601.   geom_histogram(bins = 30, fill = "lightblue", color = "black") +  # bins = 30 specifies the number of bins
  602.   labs(title = "Istogramma delle ore di chiusura della Pipeline", x = "Ore", y = "Eventi") +
  603.   theme_minimal()+
  604.   scale_x_continuous(limits = c(0, 200)) +
  605.   geom_vline(aes(xintercept = mean(filtered_yes$durata_chiusura, na.rm = TRUE)),color = 'red', linetype = 1, size = 2)
  606.  
  607. filtered_yes <- filtered_yes %>%
  608.   filter(is.finite(durata_chiusura))
  609.  
  610.  
  611. # Verifica la classe della colonna e i valori
  612. summary(filtered_yes$durata_chiusura)  # Riepilogo dei dati
  613. any(is.na(filtered_yes$durata_chiusura))  # Verifica NA
  614. any(!is.finite(filtered_yes$durata_chiusura))  # Verifica NaN e Inf
  615.  
  616.  
  617.  
  618.  
  619. install.packages("fitdistrplus")
  620. library(fitdistrplus)
  621. library(dplyr)
  622.  
  623.  
  624. filtered_yes <- filtered_yes %>%
  625.   filter(chiusura >= 0)
  626. chiusura <- filtered_yes$chiusura
  627. filtered_yes$durata_chiusura <- as.numeric(filtered_yes$durata_chiusura)
  628.  
  629. fit_gamma <- fitdist(filtered_yes$durata_chiusura, "gamma",
  630.                      start = list(shape = shape_init, scale = scale_init),
  631.                      method = "mme",  # Prova con un metodo di ottimizzazione diverso
  632.                      control = list(maxit = 1000))
  633. fit_gamma$estimate
  634.  
  635. shape <- as.numeric(fit_gamma$estimate[1])
  636. scale <- as.numeric(fit_gamma$estimate[2])
  637.  
  638. # Creare una sequenza di valori per il grafico
  639. x_vals <- seq(1, max(filtered_yes$durata_chiusura), length.out = 1341)
  640. y_vals <- dgamma(x_vals, shape = shape, scale = scale)
  641.  
  642. # Creare il grafico
  643. ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
  644.   geom_histogram(bins = 30, fill = "lightblue", color = "black", aes(y = ..density..)) +
  645.   labs(title = "Istogramma delle ore di chiusura della Pipeline con distribuzione Gamma",
  646.        x = "Ore", y = "Density") +
  647.   theme_minimal() +
  648.   scale_x_continuous(limits = c(0, 200)) +
  649.   geom_line(aes(x = x_vals, y = y_vals), color = "red", size = 1)
  650.  
  651. fit <- fitdist(filtered_yes$durata_chiusura, "exp")
  652.  
  653. # Visualizzare i parametri stimati (lambda)
  654. fit$estimate
  655.  
  656. x_vals <- seq(0, max(filtered_yes$durata_chiusura, na.rm = TRUE), length.out = 1341)
  657. y_vals <- dexp(x_vals, rate = fit$estimate["rate"])
  658.  
  659. # Creare il grafico
  660. ggplot(filtered_yes, aes(x = `durata_chiusura`)) +
  661.   geom_histogram(bins = 30, fill = "lightblue", color = "black", aes(y = ..density..)) +
  662.   labs(title = "Istogramma delle ore di chiusura della Pipeline", x = "Ore", y = "Eventi") +
  663.   theme_minimal() +
  664.   scale_x_continuous(limits = c(0, 200)) +
  665.   geom_vline(aes(xintercept = mean(filtered_yes$durata_chiusura, na.rm = TRUE)), color = 'red', linetype = 1, size = 2) +  
  666.   geom_line(aes(x = x_vals, y = y_vals), color = "blue", size = 1)  # Aggiungi la curva gamma
  667.  
  668. filtered_yes_no_duplicates <- unique(filtered_yes$durata_chiusura)
  669.  
  670. ks_test <- ks.test(filtered_yes_no_duplicates, "pexp", rate = fit$estimate["rate"])
  671. print(ks_test)
  672.  
  673. # Visualizzare i risultati del test
  674. print(ks_test)
  675.  
  676.  
  677. fit_binomiale_negativa <- fitdist(filtered_yes$durata_chiusura, "nbinom")
  678.  
  679. # Visualizzare i parametri stimati (size e prob)
  680. print(fit_binomiale_negativa$estimate)
  681.  
  682.  
  683.  
  684.  
  685.  
  686.  
  687.  
  688. pmax(filtered_yes$durata_chiusura, na.rm=TRUE)
  689.  
  690. ggplot(data, aes(x = `Liquid Type`, y = `Environmental Remediation Costs`)) +
  691.   geom_point(color = "blue", size = 2) +
  692.   labs(
  693.     title = "Scatter Plot of X vs Y",
  694.     x = "X-axis (Independent Variable)",
  695.     y = "Y-axis (Dependent Variable)"
  696.   ) +
  697.   theme_minimal()
  698.  
  699. #EFFICIENZA DEL RECUPERO
  700. data_noNA <- data %>%
  701.   mutate(`Intentional Release (Barrels)` = ifelse(is.na(`Intentional Release (Barrels)`), 0, `Intentional Release (Barrels)`)) %>%
  702.   filter(!is.na(`Accident State`))
  703.  
  704. ds_efficiency <- data_noNA %>%
  705.   group_by(`Pipeline Type`) %>%
  706.   summarize(
  707.     mean_percentage = mean(((`Liquid Recovery (Barrels)`) / (`Unintentional Release (Barrels)`+`Intentional Release (Barrels)`)), na.rm = TRUE)  # Calcolo percentuale e media
  708.   )
  709.  
  710.  
  711.  
  712. ggplot(ds_efficiency, aes(x = reorder(`Accident State`, mean_percentage), y = mean_percentage)) +
  713.   geom_bar(stat = "identity") +
  714.   labs(
  715.     title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
  716.     x = "STATO",
  717.     y = "FREQUENZA"
  718.   ) +
  719.   theme_minimal()
  720.  
  721. ggplot(ds_efficiency, aes(x = reorder(`Liquid Type`, mean_percentage), y = mean_percentage)) +
  722.   geom_bar(stat = "identity") +
  723.   labs(
  724.     title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI TIPO DI LIQUIDO",
  725.     x = "TIPO DI LIQUIDO",
  726.     y = "FREQUENZA"
  727.   ) +
  728.   theme_minimal() +
  729.   theme(axis.text.x = element_text(angle = 45, hjust = 1))
  730.  
  731. ggplot(ds_efficiency, aes(x = reorder(`Cause Category`, mean_percentage), y = mean_percentage)) +
  732.   geom_bar(stat = "identity") +
  733.   labs(
  734.     title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
  735.     x = "STATO",
  736.     y = "FREQUENZA"
  737.   ) +
  738.   theme_minimal() +
  739.   theme(axis.text.x = element_text(angle = 45, hjust = 1))
  740.  
  741. ggplot(ds_efficiency, aes(x = reorder(`Pipeline Type`, mean_percentage), y = mean_percentage)) +
  742.   geom_bar(stat = "identity") +
  743.   labs(
  744.     title = "EFFICIENZA DI RECUPERO MEDIO DI BARILI PER OGNI STATO",
  745.     x = "PIPELINE TYPE",
  746.     y = "FREQUENZA"
  747.   ) +
  748.   theme_minimal() +
  749.   theme(axis.text.x = element_text(angle = 45, hjust = 1))
  750.  
  751.  
  752. #---------------------- TOTAL RELEASE --------------------------
  753.  
  754. # grafico dei box plot delle distribuzioni dei total release (nuova colonna creata come somma tra Unintentional
  755. # Intentional), pesata poi sull'efficienza di recupero per ogni categoria
  756.  
  757. data$`Intentional Release (Barrels)`[is.na(data$`Intentional Release (Barrels)`)] <- 0
  758.  
  759. ds_efficiency <- data %>%
  760.   group_by(`Cause Category`) %>%
  761.   summarize(
  762.     mean_percentage = mean(((`Liquid Recovery (Barrels)`) / (`Unintentional Release (Barrels)`+`Intentional Release (Barrels)`)), na.rm = TRUE)  # Calcolo percentuale e media
  763.   )
  764.  
  765.  
  766. ds_UnintBarrels_10 <- data %>%
  767.   filter(`Unintentional Release (Barrels)` >= 1) %>%
  768.   mutate(`Total Release (Barrels)` = `Unintentional Release (Barrels)` + `Intentional Release (Barrels)`)
  769.  
  770. ds_UnintBarrels_10 <- ds_UnintBarrels_10 %>%
  771.   left_join(ds_efficiency, by = "Cause Category")
  772.  
  773. p1 <- ggplot(ds_UnintBarrels_10, aes(x = `Cause Category`, y = `Total Release (Barrels)`, fill = mean_percentage)) +
  774.   geom_boxplot() +
  775.   labs(
  776.     title = "Distribuzione Total Release (Barrels) per Categoria",
  777.     x = "Cause Category",
  778.     y = "Barrels",
  779.     fill = "Recovery Efficiency"
  780.   ) +
  781.   theme_minimal() +
  782.   theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  783.   ylim(0, 30)+
  784.   scale_fill_gradientn(colors = c("red", "yellow", "green"))
  785. p1
  786.  
  787.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement