Advertisement
YaBoiSwayZ

SEC EDGAR CIK Retriever and URL Generator

May 26th, 2024
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.31 KB | Source Code | 0 0
  1. import aiohttp
  2. import asyncio
  3. import json
  4. import logging
  5. import sys
  6. from datetime import datetime, timedelta
  7. from cachetools import TTLCache
  8. from typing import Optional
  9. import os
  10. import structlog
  11. from ratelimit import limits, sleep_and_retry
  12.  
  13. LOG_FILENAME = os.getenv('LOG_FILENAME', 'sec_cik_retriever.log')
  14. CACHE_FILENAME = os.getenv('CACHE_FILENAME', 'cik_cache.json')
  15. CIK_LOOKUP_URL = 'https://www.sec.gov/cgi-bin/browse-edgar?CIK={ticker}&find=Search&owner=exclude&action=getcompany'
  16. SEC_URL_TEMPLATE = 'https://www.sec.gov/edgar/browse/?CIK={cik}&owner=exclude'
  17. CACHE_VALIDITY_DAYS = int(os.getenv('CACHE_VALIDITY_DAYS', '30'))
  18. ONE_MINUTE = 60
  19.  
  20. structlog.configure(
  21.     processors=[structlog.processors.JSONRenderer()],
  22.     context_class=dict,
  23.     logger_factory=structlog.stdlib.LoggerFactory(),
  24.     wrapper_class=structlog.stdlib.BoundLogger,
  25.     cache_logger_on_first_use=True,
  26. )
  27. logger = structlog.get_logger()
  28.  
  29. cache = TTLCache(maxsize=1000, ttl=CACHE_VALIDITY_DAYS * 24 * 3600)
  30.  
  31. class SessionManager:
  32.     def __init__(self):
  33.         self.session = None
  34.  
  35.     async def __aenter__(self):
  36.         if self.session is None:
  37.             self.session = aiohttp.ClientSession()
  38.         return self.session
  39.  
  40.     async def __aexit__(self, exc_type, exc, tb):
  41.         if self.session:
  42.             await self.session.close()
  43.  
  44. session_manager = SessionManager()
  45.  
  46. @sleep_and_retry
  47. @limits(calls=15, period=ONE_MINUTE)
  48. async def get_cik(ticker: str) -> Optional[str]:
  49.     async with session_manager as session:
  50.         try:
  51.             async with session.get(CIK_LOOKUP_URL.format(ticker=ticker), timeout=10) as response:
  52.                 response.raise_for_status()
  53.                 text = await response.text()
  54.                 cik_number = parse_cik_from_response(text)
  55.                 cache[ticker] = cik_number
  56.                 return cik_number
  57.         except aiohttp.ClientError as e:
  58.             logger.error('Client error', ticker=ticker, error=str(e))
  59.         except asyncio.TimeoutError as e:
  60.             logger.error('Timeout error', ticker=ticker, error=str(e))
  61.         except Exception as e:
  62.             logger.error('Unexpected error', ticker=ticker, error=str(e))
  63.         return None
  64.  
  65. def parse_cik_from_response(text: str) -> str:
  66.     cik_search_text = 'Central Index Key: '
  67.     start_index = text.find(cik_search_text) + len(cik_search_text)
  68.     end_index = start_index + 10
  69.     return text[start_index:end_index].strip()
  70.  
  71. def generate_sec_url(cik: str) -> str:
  72.     return SEC_URL_TEMPLATE.format(cik=cik)
  73.  
  74. async def save_cache_to_disk():
  75.     with open(CACHE_FILENAME, 'w') as file:
  76.         json.dump(dict(cache), file)
  77.  
  78. async def schedule_cache_save(interval: int):
  79.     while True:
  80.         await asyncio.sleep(interval)
  81.         await save_cache_to_disk()
  82.  
  83. async def main():
  84.     asyncio.create_task(schedule_cache_save(60 * 60))  # Save cache every hour
  85.     tickers = sys.argv[1:] if len(sys.argv) > 1 else [input('Enter ticker symbol: ').strip()]
  86.     for ticker in tickers:
  87.         cik_number = await get_cik(ticker)
  88.         if cik_number:
  89.             sec_url = generate_sec_url(cik_number)
  90.             print(f'SEC EDGAR URL for {ticker}: {sec_url}')
  91.         else:
  92.             print(f'Failed to generate SEC EDGAR URL for {ticker}')
  93.  
  94. if __name__ == '__main__':
  95.     asyncio.run(main())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement