Advertisement
cymplecy

RegEx find BlueSky Handle

Nov 21st, 2024
615
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.82 KB | Source Code | 0 0
  1. import re
  2. from typing import List, Dict
  3.  
  4. def parse_mentions(text: str) -> List[Dict]:
  5.     spans = []
  6.     # regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
  7.     mention_regex = rb"[$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)"
  8.     text_bytes = text.encode("UTF-8")
  9.     for m in re.finditer(mention_regex, text_bytes):
  10.         spans.append({
  11.             "start": m.start(1),
  12.             "end": m.end(1),
  13.             "handle": m.group(1)[1:].decode("UTF-8")
  14.         })
  15.     return spans
  16.  
  17.  
  18. # Parse facets from text and resolve the handles to DIDs
  19. def parse_facets(text: str) -> List[Dict]:
  20.     facets = []
  21.     for m in parse_mentions(text):
  22.         print (m)
  23.  
  24. parse_facets("@cheerlights.bsky.social test test test @cymplecy.bsky.social")  
  25.  
Tags: regex
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement