Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #В файле visits.csv зафиксированы даты посещения клиента.
- #Поля:
- #id - уникальный идентификатор клиента
- #start_dt - дата визита (первый визит совпадает с датой регистрации клиента)
- #В файле purchases.csv находится информация о совершенных покупках клиентом.
- #Поля:
- #buy_ts — дата и время покупки
- #id - уникальный идентификатор клиента
- #revenue — доход в y.e.
- #Посчитайте LTV на 30 день с момента прихода клиента. Постройте график динамики LTV по дням.
- #Для начала возьмём небольшую когорту -- 50 пользователей -- затем увеличим количество
- #(1)Берем когорту пользователей и для каждого пользователя считаем прибыль в динамике по дням с момента регистрации.
- import csv
- calendar = {
- 2016: {1: 31, 2: 29, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31},
- 2017: {1: 31, 2: 28, 3: 31, 4: 30, 5: 31, 6: 30, 7: 31, 8: 31, 9: 30, 10: 31, 11: 30, 12: 31}
- }
- def count_end_date(start_date, calendar_):
- year = start_date[0]
- month = start_date[1]
- day = start_date[2]
- end_year = year
- end_month = month
- end_day = day
- for i in range(30):
- end_day += 1
- if end_year not in calendar_.keys():
- continue
- if end_day > calendar_[end_year][end_month]:
- end_day = 1
- end_month += 1
- if end_month == 13:
- end_month = 1
- if end_month == 1:
- end_year += 1
- end_date = [end_year, end_month, end_day]
- return end_date
- def check_period( date, start_date, end_date, calendar_):
- start_year = start_date[0]
- start_month = start_date[1]
- start_day = start_date[2]
- end_year = end_date[0]
- end_month = end_date[1]
- end_day = end_date[2]
- this_year = date[0]
- this_month = date[1]
- this_day = date[2]
- ans = False
- if this_month == start_month:
- if this_day <= calendar_[start_year][start_month]:
- ans = True
- elif this_month == end_month:
- if this_day <= end_day:
- ans = True
- return ans
- from_date = {}
- to_date = {}
- def recycle(line): #get [id, date, income] from a row of csv
- if len(line) == 1 and type(line[0]) == str and '"' in line[0]:
- line = line[0]
- sz = len(line)
- num_quotes = 0
- income = ""
- for i in reversed(range(sz)):
- if line[i] == '"':
- num_quotes += 1
- elif num_quotes == 1:
- income += line[i]
- elif num_quotes == 2:
- break
- if income == '':
- return
- income = income[::-1]
- income = income.replace(',', '.')
- income = float(income)
- date = line.split()[0]
- date = date.split('.')
- day = int(date[0])
- month = int(date[1])
- year = int(date[2])
- date = [year, month, day]
- id = line.split(',')
- id = id[1]
- elif len(line) == 1 and type(line[0]) == str and '"' not in line[0]:
- #['11.01.2017 10:52:11,2636761148,11']
- date, id, income = line[0].split(',')
- income = float(income)
- date = list(int(i) for i in date.split()[0].split('.')[::-1])
- else:
- income = float(line[-1])
- id = line[-2]
- date = line[0].split()[0].split('.')
- day = int(date[0])
- month = int(date[1])
- year = int(date[2])
- date = [year, month, day]
- return [id, date, income]
- dynamic_income_per_user = {}
- with open("visits.csv", 'r', encoding = 'utf-8') as file:
- reader = csv.reader(file, delimiter = ',', lineterminator = '\r')
- u = 0
- for line in reader:
- u+=1
- id = line[0]
- date = line[1]
- date = list(int(i) for i in date.split('-')) #[year, month, day]
- if id not in from_date.keys():
- from_date[id] = date
- to_date[id] = count_end_date(date, calendar)
- dynamic_income_per_user[id] = [0] * 30
- def num_of_day_from_start(start_date, date, calendar_):
- start_year = start_date[0]
- start_month = start_date[1]
- start_day = start_date[2]
- this_year = date[0]
- this_month = date[1]
- this_day = date[2]
- days_till_start = 0
- days_till_this = 0
- #how many days till start
- for year in calendar_:
- for month in calendar_[year]:
- if month == start_month and year == start_year:
- days_till_start += start_day
- break
- else:
- days_till_start += calendar_[year][month]
- #how many days till this date
- for year in calendar_:
- for month in calendar_[year]:
- if month == this_month and year == this_year:
- days_till_this += this_day
- break
- else:
- days_till_this += calendar_[year][month]
- days_from_start_till_this_date = days_till_this - days_till_start
- return days_from_start_till_this_date
- with open("purchases.csv", 'r', encoding = 'utf-8') as file:
- reader = csv.reader(file, delimiter = ';', lineterminator = '\r')
- for line in reader:
- purchase = recycle(line) #[id, date, income]
- id = purchase[0]
- date = purchase[1]
- income = purchase[2]
- start_date = from_date[id]
- end_date = to_date[id]
- in_30_days_from_start = check_period(date, start_date, end_date, calendar)
- if in_30_days_from_start == True:
- day_from_start = num_of_day_from_start(start_date, date, calendar)
- if day_from_start not in range(0,30):
- continue
- print(day_from_start)
- if id in dynamic_income_per_user:
- dynamic_income_per_user[id][day_from_start] += income
- #теперь у нас есть данные по "динамической" прибыли о каждого покупателя
- print(dynamic_income_per_user)
- #Идея на завтра: записать DictWriter --> дни от 1 до 30 с регистрации -->DictReader -->считать сумму и т.д .
- #ИЛИ можно обойтись без этого???
- #Кстати, почему код don't run but debug???
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement