Advertisement
gubichas

Untitled

Mar 9th, 2025
132
0
16 hours
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.73 KB | None | 0 0
  1. from datetime import datetime, date, timedelta
  2. import logging
  3. import numpy as np
  4. from typing import Dict, List, Tuple
  5.  
  6. from champ_api_working import ChampApiV4
  7. from common import AnalyticsDb
  8. from ym_api_working import YmApi
  9.  
  10. logger = logging.getLogger('pubs_main')
  11. logger.setLevel(logging.INFO)
  12.  
  13. class PubsWorking:
  14.     def __init__(self, champ: ChampApiV4, ym: YmApi, db: AnalyticsDb):
  15.         self.champ = champ
  16.         self.ym = ym
  17.         self.db = db
  18.  
  19.     # ... (остальные методы остаются без изменений, рефакторим только работу с exclusive_pubs) ...
  20.  
  21.     def work_with_exclusive_pubs(self, pub_d1: datetime, pub_d2: datetime, sc_d1: date, sc_d2: date) -> None:
  22.         """
  23.        Downloads and processes exclusive publications data for a given period.
  24.  
  25.        Args:
  26.            pub_d1 (datetime): Start date for publication period.
  27.            pub_d2 (datetime): End date for publication period.
  28.            sc_d1 (date): Start date for scan period.
  29.            sc_d2 (date): End date for scan period.
  30.        """
  31.         logger.info(
  32.             f"Starting exclusive pubs processing. Publication period: {pub_d1} - {pub_d2}, "
  33.             f"Scan period: {sc_d1} - {sc_d2}"
  34.         )
  35.  
  36.         # 1. Получаем данные об эксклюзивных публикациях
  37.         exclusive_pubs_ids = self.get_exclusive_pubs(pub_d1, pub_d2)
  38.         if not exclusive_pubs_ids['news'] and not exclusive_pubs_ids['article']:
  39.             logger.warning("No exclusive publications found for the given period.")
  40.             return
  41.  
  42.         # 2. Удаляем старые данные
  43.         self._delete_old_exclusive_data(sc_d1, sc_d2)
  44.  
  45.         # 3. Собираем новые данные по трафику и статистике
  46.         exclusive_data = self._collect_exclusive_stats(
  47.             sc_d1, sc_d2, exclusive_pubs_ids['news'], exclusive_pubs_ids['article']
  48.         )
  49.  
  50.         # 4. Сохраняем новые данные
  51.         self._save_exclusive_data(exclusive_data, sc_d1, sc_d2)
  52.  
  53.         logger.info("Finished processing exclusive publications.")
  54.  
  55.     def _delete_old_exclusive_data(self, scan_start: date, scan_end: date) -> None:
  56.         """Deletes old exclusive publications data for the specified scan period."""
  57.         logger.info(f"Deleting old exclusive publications data from {scan_start} to {scan_end}")
  58.         self.db.del_exclusive_pubs_data(scan_start, scan_end)
  59.  
  60.     def _collect_exclusive_stats(
  61.         self, scan_start: date, scan_end: date, news_ids: Dict[int, int], article_ids: Dict[int, int]
  62.     ) -> List[dict]:
  63.         """
  64.        Collects traffic statistics for exclusive publications.
  65.  
  66.        Args:
  67.            scan_start (date): Start date for scan period.
  68.            scan_end (date): End date for scan period.
  69.            news_ids (Dict[int, int]): Mapping of news pub IDs to internal IDs.
  70.            article_ids (Dict[int, int]): Mapping of article pub IDs to internal IDs.
  71.  
  72.        Returns:
  73.            List[dict]: List of dictionaries with stats for exclusive publications.
  74.        """
  75.         exclusive_rows = []
  76.         news_keys = list(news_ids.keys())
  77.         article_keys = list(article_ids.keys())
  78.  
  79.         for traffic_id, traffic_name, traffic_filter in self.db.traffics:
  80.             logger.info(f"Fetching Yandex Metrika stats for traffic: {traffic_name}")
  81.             stats = self.get_pub_stats(scan_start, scan_end, news_keys, article_keys, traffic_name, traffic_filter)
  82.  
  83.             for day, date_stats in stats.items():
  84.                 logger.info(
  85.                     f"{day}: Retrieved {len(date_stats['news'])} news and {len(date_stats['article'])} articles"
  86.                 )
  87.                 exclusive_rows.extend(
  88.                     self._process_day_stats(day, date_stats, news_ids, article_ids, traffic_id)
  89.                 )
  90.  
  91.         return exclusive_rows
  92.  
  93.     def _process_day_stats(
  94.         self, day: date, date_stats: Dict[str, dict], news_ids: Dict[int, int],
  95.         article_ids: Dict[int, int], traffic_id: int
  96.     ) -> List[dict]:
  97.         """
  98.        Processes daily stats for news and articles into a list of rows.
  99.  
  100.        Args:
  101.            day (date): Date of the stats.
  102.            date_stats (Dict[str, dict]): Stats for news and articles.
  103.            news_ids (Dict[int, int]): Mapping of news pub IDs to internal IDs.
  104.            article_ids (Dict[int, int]): Mapping of article pub IDs to internal IDs.
  105.            traffic_id (int): Traffic ID for the stats.
  106.  
  107.        Returns:
  108.            List[dict]: Processed rows for the day.
  109.        """
  110.         rows = []
  111.         for pub_type, pub_stats in date_stats.items():
  112.             id_map = news_ids if pub_type == 'news' else article_ids
  113.             for pub_id, stats in pub_stats.items():
  114.                 if pub_id not in id_map:
  115.                     logger.debug(f"Skipping unknown pub ID {pub_id} from Yandex Metrika stats.")
  116.                     continue
  117.                 internal_id = id_map[pub_id]
  118.                 rows.append({
  119.                     'internal_id': internal_id,
  120.                     'date': day,
  121.                     'traffic_id': traffic_id,
  122.                     'pageviews': stats['pv'],
  123.                     'article_number': str(pub_id)
  124.                 })
  125.         return rows
  126.  
  127.     def _save_exclusive_data(self, data: List[dict], scan_start: date, scan_end: date) -> None:
  128.         """Saves exclusive publications data to the database."""
  129.         logger.info(f"Saving {len(data)} exclusive publication records for period {scan_start} to {scan_end}")
  130.         if data:
  131.             self.db.new_exclusive_pubs_data_batch(data)
  132.         else:
  133.             logger.warning("No exclusive publication data to save.")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement