Advertisement
rgruber

second level domain (SLD) regexp

Jan 4th, 2019
360
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. reUrlOptHTTP: /^((https?|ftp):\/\/)?(((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&\'\(\)\*\+,;=]|:)*@)?(((\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5]))|((([a-z]|\d|_|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|\d|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.)+(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])*([a-z]|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])))\.?)(:\d*)?)(\/((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&\'\(\)\*\+,;=]|:|@)+(\/(([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&\'\(\)\*\+,;=]|:|@)*)*)?)?(\?((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&\'\(\)\*\+,;=]|:|@)|[\uE000-\uF8FF]|\/|\?)*)?(\#((([a-z]|\d|-|\.|_|~|[\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF])|(%[\da-f]{2})|[!\$&\'\(\)\*\+,;=]|:|@)|\/|\?)*)?$/i,
  2.  
  3.  
  4.         list: {
  5.             ac: "com|gov|mil|net|org",
  6.             ae: "ac|co|gov|mil|name|net|org|pro|sch",
  7.             af: "com|edu|gov|net|org",
  8.             al: "com|edu|gov|mil|net|org",
  9.             ao: "co|ed|gv|it|og|pb",
  10.             ar: "com|edu|gob|gov|int|mil|net|org|tur",
  11.             at: "ac|co|gv|or",
  12.             au: "asn|com|csiro|edu|gov|id|net|org",
  13.             ba: "co|com|edu|gov|mil|net|org|rs|unbi|unmo|unsa|untz|unze",
  14.             bb: "biz|co|com|edu|gov|info|net|org|store|tv",
  15.             bh: "biz|cc|com|edu|gov|info|net|org",
  16.             bn: "com|edu|gov|net|org",
  17.             bo: "com|edu|gob|gov|int|mil|net|org|tv",
  18.             br: "adm|adv|agr|am|arq|art|ato|b|bio|blog|bmd|cim|cng|cnt|com|coop|ecn|edu|eng|esp|etc|eti|far|flog|fm|fnd|fot|fst|g12|ggf|gov|imb|ind|inf|jor|jus|lel|mat|med|mil|mus|net|nom|not|ntr|odo|org|ppg|pro|psc|psi|qsl|rec|slg|srv|tmp|trd|tur|tv|vet|vlog|wiki|zlg",
  19.             bs: "com|edu|gov|net|org",
  20.             bz: "du|et|om|ov|rg",
  21.             ca: "ab|bc|mb|nb|nf|nl|ns|nt|nu|on|pe|qc|sk|yk",
  22.             ck: "biz|co|edu|gen|gov|info|net|org",
  23.             cn: "ac|ah|bj|com|cq|edu|fj|gd|gov|gs|gx|gz|ha|hb|he|hi|hl|hn|jl|js|jx|ln|mil|net|nm|nx|org|qh|sc|sd|sh|sn|sx|tj|tw|xj|xz|yn|zj",
  24.             co: "com|edu|gov|mil|net|nom|org",
  25.             cr: "ac|c|co|ed|fi|go|or|sa",
  26.             cy: "ac|biz|com|ekloges|gov|ltd|name|net|org|parliament|press|pro|tm",
  27.             do: "art|com|edu|gob|gov|mil|net|org|sld|web",
  28.             dz: "art|asso|com|edu|gov|net|org|pol",
  29.             ec: "com|edu|fin|gov|info|med|mil|net|org|pro",
  30.             eg: "com|edu|eun|gov|mil|name|net|org|sci",
  31.             er: "com|edu|gov|ind|mil|net|org|rochest|w",
  32.             es: "com|edu|gob|nom|org",
  33.             et: "biz|com|edu|gov|info|name|net|org",
  34.             fj: "ac|biz|com|info|mil|name|net|org|pro",
  35.             fk: "ac|co|gov|net|nom|org",
  36.             fr: "asso|com|f|gouv|nom|prd|presse|tm",
  37.             gg: "co|net|org",
  38.             gh: "com|edu|gov|mil|org",
  39.             gn: "ac|com|gov|net|org",
  40.             gr: "com|edu|gov|mil|net|org",
  41.             gt: "com|edu|gob|ind|mil|net|org",
  42.             gu: "com|edu|gov|net|org",
  43.             hk: "com|edu|gov|idv|net|org",
  44.             id: "ac|co|go|mil|net|or|sch|web",
  45.             il: "ac|co|gov|idf|k12|muni|net|org",
  46.             in: "ac|co|edu|ernet|firm|gen|gov|i|ind|mil|net|nic|org|res",
  47.             iq: "com|edu|gov|i|mil|net|org",
  48.             ir: "ac|co|dnssec|gov|i|id|net|org|sch",
  49.             it: "edu|gov",
  50.             je: "co|net|org",
  51.             jo: "com|edu|gov|mil|name|net|org|sch",
  52.             jp: "ac|ad|co|ed|go|gr|lg|ne|or",
  53.             ke: "ac|co|go|info|me|mobi|ne|or|sc",
  54.             kh: "com|edu|gov|mil|net|org|per",
  55.             ki: "biz|com|de|edu|gov|info|mob|net|org|tel",
  56.             km: "asso|com|coop|edu|gouv|k|medecin|mil|nom|notaires|pharmaciens|presse|tm|veterinaire",
  57.             kn: "edu|gov|net|org",
  58.             kr: "ac|busan|chungbuk|chungnam|co|daegu|daejeon|es|gangwon|go|gwangju|gyeongbuk|gyeonggi|gyeongnam|hs|incheon|jeju|jeonbuk|jeonnam|k|kg|mil|ms|ne|or|pe|re|sc|seoul|ulsan",
  59.             kw: "com|edu|gov|net|org",
  60.             ky: "com|edu|gov|net|org",
  61.             kz: "com|edu|gov|mil|net|org",
  62.             lb: "com|edu|gov|net|org",
  63.             lk: "assn|com|edu|gov|grp|hotel|int|ltd|net|ngo|org|sch|soc|web",
  64.             lr: "com|edu|gov|net|org",
  65.             lv: "asn|com|conf|edu|gov|id|mil|net|org",
  66.             ly: "com|edu|gov|id|med|net|org|plc|sch",
  67.             ma: "ac|co|gov|m|net|org|press",
  68.             mc: "asso|tm",
  69.             me: "ac|co|edu|gov|its|net|org|priv",
  70.             mg: "com|edu|gov|mil|nom|org|prd|tm",
  71.             mk: "com|edu|gov|inf|name|net|org|pro",
  72.             ml: "com|edu|gov|net|org|presse",
  73.             mn: "edu|gov|org",
  74.             mo: "com|edu|gov|net|org",
  75.             mt: "com|edu|gov|net|org",
  76.             mv: "aero|biz|com|coop|edu|gov|info|int|mil|museum|name|net|org|pro",
  77.             mw: "ac|co|com|coop|edu|gov|int|museum|net|org",
  78.             mx: "com|edu|gob|net|org",
  79.             my: "com|edu|gov|mil|name|net|org|sch",
  80.             nf: "arts|com|firm|info|net|other|per|rec|store|web",
  81.             ng: "biz|com|edu|gov|mil|mobi|name|net|org|sch",
  82.             ni: "ac|co|com|edu|gob|mil|net|nom|org",
  83.             np: "com|edu|gov|mil|net|org",
  84.             nr: "biz|com|edu|gov|info|net|org",
  85.             om: "ac|biz|co|com|edu|gov|med|mil|museum|net|org|pro|sch",
  86.             pe: "com|edu|gob|mil|net|nom|org|sld",
  87.             ph: "com|edu|gov|i|mil|net|ngo|org",
  88.             pk: "biz|com|edu|fam|gob|gok|gon|gop|gos|gov|net|org|web",
  89.             pl: "art|bialystok|biz|com|edu|gda|gdansk|gorzow|gov|info|katowice|krakow|lodz|lublin|mil|net|ngo|olsztyn|org|poznan|pwr|radom|slupsk|szczecin|torun|warszawa|waw|wroc|wroclaw|zgora",
  90.             pr: "ac|biz|com|edu|est|gov|info|isla|name|net|org|pro|prof",
  91.             ps: "com|edu|gov|net|org|plo|sec",
  92.             pw: "belau|co|ed|go|ne|or",
  93.             ro: "arts|com|firm|info|nom|nt|org|rec|store|tm|www",
  94.             rs: "ac|co|edu|gov|in|org",
  95.             sb: "com|edu|gov|net|org",
  96.             sc: "com|edu|gov|net|org",
  97.             sh: "co|com|edu|gov|net|nom|org",
  98.             sl: "com|edu|gov|net|org",
  99.             st: "co|com|consulado|edu|embaixada|gov|mil|net|org|principe|saotome|store",
  100.             sv: "com|edu|gob|org|red",
  101.             sz: "ac|co|org",
  102.             tr: "av|bbs|bel|biz|com|dr|edu|gen|gov|info|k12|name|net|org|pol|tel|tsk|tv|web",
  103.             tt: "aero|biz|cat|co|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel",
  104.             tw: "club|com|ebiz|edu|game|gov|idv|mil|net|org",
  105.             mu: "ac|co|com|gov|net|or|org",
  106.             mz: "ac|co|edu|gov|org",
  107.             na: "co|com",
  108.             nz: "ac|co|cri|geek|gen|govt|health|iwi|maori|mil|net|org|parliament|school",
  109.             pa: "abo|ac|com|edu|gob|ing|med|net|nom|org|sld",
  110.             pt: "com|edu|gov|int|net|nome|org|publ",
  111.             py: "com|edu|gov|mil|net|org",
  112.             qa: "com|edu|gov|mil|net|org",
  113.             re: "asso|com|nom",
  114.             ru: "ac|adygeya|altai|amur|arkhangelsk|astrakhan|bashkiria|belgorod|bir|bryansk|buryatia|cbg|chel|chelyabinsk|chita|chukotka|chuvashia|com|dagestan|e-burg|edu|gov|grozny|int|irkutsk|ivanovo|izhevsk|jar|joshkar-ola|kalmykia|kaluga|kamchatka|karelia|kazan|kchr|kemerovo|khabarovsk|khakassia|khv|kirov|koenig|komi|kostroma|kranoyarsk|kuban|kurgan|kursk|lipetsk|magadan|mari|mari-el|marine|mil|mordovia|mosreg|msk|murmansk|nalchik|net|nnov|nov|novosibirsk|nsk|omsk|orenburg|org|oryol|penza|perm|pp|pskov|ptz|rnd|ryazan|sakhalin|samara|saratov|simbirsk|smolensk|spb|stavropol|stv|surgut|tambov|tatarstan|tom|tomsk|tsaritsyn|tsk|tula|tuva|tver|tyumen|udm|udmurtia|ulan-ude|vladikavkaz|vladimir|vladivostok|volgograd|vologda|voronezh|vrn|vyatka|yakutia|yamal|yekaterinburg|yuzhno-sakhalinsk",
  115.             rw: "ac|co|com|edu|gouv|gov|int|mil|net",
  116.             sa: "com|edu|gov|med|net|org|pub|sch",
  117.             sd: "com|edu|gov|info|med|net|org|tv",
  118.             se: "a|ac|b|bd|c|d|e|f|g|h|i|k|l|m|n|o|org|p|parti|pp|press|r|s|t|tm|u|w|x|y|z",
  119.             sg: "com|edu|gov|idn|net|org|per",
  120.             sn: "art|com|edu|gouv|org|perso|univ",
  121.             sy: "com|edu|gov|mil|net|news|org",
  122.             th: "ac|co|go|in|mi|net|or",
  123.             tj: "ac|biz|co|com|edu|go|gov|info|int|mil|name|net|nic|org|test|web",
  124.             tn: "agrinet|com|defense|edunet|ens|fin|gov|ind|info|intl|mincom|nat|net|org|perso|rnrt|rns|rnu|tourism",
  125.             tz: "ac|co|go|ne|or",
  126.             ua: "biz|cherkassy|chernigov|chernovtsy|ck|cn|co|com|crimea|cv|dn|dnepropetrovsk|donetsk|dp|edu|gov|if|in|ivano-frankivsk|kh|kharkov|kherson|khmelnitskiy|kiev|kirovograd|km|kr|ks|kv|lg|lugansk|lutsk|lviv|me|mk|net|nikolaev|od|odessa|org|pl|poltava|pp|rovno|rv|sebastopol|sumy|te|ternopil|uzhgorod|vinnica|vn|zaporizhzhe|zhitomir|zp|zt",
  127.             ug: "ac|co|go|ne|or|org|sc",
  128.             uk: "ac|bl|british-library|co|cym|gov|govt|icnet|jet|lea|ltd|me|mil|mod|national-library-scotland|nel|net|nhs|nic|nls|org|orgn|parliament|plc|police|sch|scot|soc",
  129.             us: "dni|fed|isa|kids|nsn",
  130.             uy: "com|edu|gub|mil|net|org",
  131.             ve: "co|com|edu|gob|info|mil|net|org|web",
  132.             vi: "co|com|k12|net|org",
  133.             vn: "ac|biz|com|edu|gov|health|info|int|name|net|org|pro",
  134.             ye: "co|com|gov|ltd|me|net|org|plc",
  135.             yu: "ac|co|edu|gov|org",
  136.             za: "ac|agric|alt|bourse|city|co|cybernet|db|edu|gov|grondar|iaccess|imt|inca|landesign|law|mil|net|ngo|nis|nom|olivetti|org|pix|school|tm|web",
  137.             zm: "ac|co|com|edu|gov|net|org|sch"
  138.         }
  139. for (e in o.list)
  140. r.call(o.list, e) && (t += "|((" + o.list[e] + ")." + e + ")");
  141. o.has_expression = RegExp("\\.(" + t.substr(1) + ")$", "i"),
  142. o.is_expression = RegExp("^(" + t.substr(1) + ")$", "i")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement