Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- library(dplyr)
- set.seed(123)
- transactions <- data.frame(
- transaction_id = 1:1000,
- user_id = sample(1:50, 1000, replace = TRUE),
- amount = round(runif(1000, 1, 200), 2),
- status = sample(c("success", "fail"), 1000, replace = TRUE, prob = c(0.8, 0.2)),
- region_id = sample(1:5, 1000, replace = TRUE)
- )
- users <- data.frame(
- user_id = 1:50,
- region_id = sample(1:5, 50, replace = TRUE)
- )
- # Основний аналіз
- result <- transactions %>%
- # Фільтруємо тільки успішні транзакції
- filter(status == "success") %>%
- group_by(user_id) %>%
- summarise(total_spend = sum(amount)) %>%
- left_join(users, by = "user_id") %>%
- group_by(region_id) %>%
- mutate(avg_region_spend = mean(total_spend)) %>%
- ungroup() %>%
- filter(total_spend > avg_region_spend) %>%
- arrange(region_id, desc(total_spend))
- print(result)
- library(ggplot2)
- ggplot(result, aes(x = factor(region_id), y = total_spend)) +
- geom_boxplot(aes(y = avg_region_spend), fill = "lightblue", alpha = 0.5) +
- geom_point(aes(y = total_spend), color = "red", size = 3) +
- labs(title = "Користувачі з витратами вище середнього по регіонах",
- x = "Регіон",
- y = "Сумарні витрати (TC)") +
- theme_minimal()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement