Untitled

library(readxl)
library(dplyr)
library(ggplot2)
library(lubridate)
library(scales)

# Шлях до файлу
file_path <- "test.xlsx"

# Отримуємо назви аркушів
sheets <- excel_sheets(file_path)
print(paste("Аркуші у файлі:", paste(sheets, collapse = ", ")))

# Завантаження першого аркуша
df <- read_excel(file_path, sheet = sheets[1])

# Перевірка перших рядків
head(df)

# Перетворення до datetime та розрахунок метрик
df <- df %>%
  mutate(
    request_time = as.POSIXct(request_time, format = "%Y-%m-%d %H:%M:%S"),
    start_time = as.POSIXct(start_time, format = "%Y-%m-%d %H:%M:%S"),
    finish_time = as.POSIXct(finish_time, format = "%Y-%m-%d %H:%M:%S"),

    # Перераховуємо час у хвилини
    response_time = as.numeric(difftime(start_time, request_time, units = "secs")) / 60,
    handling_time = as.numeric(difftime(finish_time, start_time, units = "secs")) / 60,
    total_time = as.numeric(difftime(finish_time, request_time, units = "secs")) / 60
  )

# Видалення запитів з аномальними значеннями (наприклад, більше ніж 5000 хвилин)
df <- df %>%
  filter(response_time <= 5000)

# Базова статистика
summary_stats <- df %>%
  summarise(
    avg_response_time = mean(response_time, na.rm = TRUE),
    avg_handling_time = mean(handling_time, na.rm = TRUE),
    avg_total_time = mean(total_time, na.rm = TRUE)
  )

print("📌 Загальні середні метрики (у хвилинах):")
print(summary_stats)

# Частка запитів > 15 та > 45 хв
exceed_15 <- mean(df$response_time > 15, na.rm = TRUE)
exceed_45 <- mean(df$response_time > 45, na.rm = TRUE)

cat("\n🔴 Частка запитів з response_time > 15 хв:", percent(exceed_15), "\n")
cat("🔴 Частка запитів з response_time > 45 хв:", percent(exceed_45), "\n")

# Аналіз по командах
team_stats <- df %>%
  group_by(team) %>%
  summarise(
    count = n(),
    avg_response = mean(response_time, na.rm = TRUE),
    avg_handling = mean(handling_time, na.rm = TRUE),
    avg_total = mean(total_time, na.rm = TRUE)
  )

print("📊 Порівняння команд:")
print(team_stats)

# Найгірші агенти
moderator_stats <- df %>%
  group_by(moderator) %>%
  summarise(
    count = n(),
    avg_response = mean(response_time, na.rm = TRUE)
  ) %>%
  filter(count > 30) %>%
  arrange(desc(avg_response))

print("🚨 Топ агентів за response_time:")
print(head(moderator_stats, 10))

# Гістограма часу відповіді
ggplot(df, aes(x = response_time)) +
  geom_histogram(binwidth = 60, fill = "steelblue", color = "white") +
  scale_x_continuous(limits = c(0, 5000)) +  # Збільшено ліміт для гістограми
  labs(title = "Розподіл часу відповіді", x = "Хвилини", y = "Кількість запитів")
# Збереження гістограми часу відповіді в PNG
ggsave("~/Desktop/response_time_histogram.png", plot = last_plot(), width = 8, height = 6)

# Середній час по командах
ggplot(team_stats, aes(x = team, y = avg_response, fill = team)) +
  geom_bar(stat = "identity") +
  labs(title = "Середній час відповіді по командах", x = "Команда", y = "Хвилини") +
  theme_minimal()
# Збереження графіка середнього часу по командах в PNG
ggsave("~/Desktop/average_response_time_by_team.png", plot = last_plot(), width = 8, height = 6)

# Топ-агенти
ggplot(head(moderator_stats, 10), aes(x = reorder(as.factor(moderator), -avg_response), y = avg_response)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Топ-10 агентів з найбільшим часом відповіді", x = "ID Агента", y = "Середній час (хв)") +
  coord_flip()


# Збереження графіка топ-10 агентів в PNG
ggsave("~/Desktop/top_10_agents.png", plot = last_plot(), width = 8, height = 6)