Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- from typing import List, Dict
- def parse_mentions(text: str) -> List[Dict]:
- spans = []
- # regex based on: https://atproto.com/specs/handle#handle-identifier-syntax
- mention_regex = rb"[$|\W](@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)"
- text_bytes = text.encode("UTF-8")
- for m in re.finditer(mention_regex, text_bytes):
- spans.append({
- "start": m.start(1),
- "end": m.end(1),
- "handle": m.group(1)[1:].decode("UTF-8")
- })
- return spans
- # Parse facets from text and resolve the handles to DIDs
- def parse_facets(text: str) -> List[Dict]:
- facets = []
- for m in parse_mentions(text):
- print (m)
- parse_facets("@cheerlights.bsky.social test test test @cymplecy.bsky.social")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement