doc_gonzo

rss

Apr 8th, 2020
343
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
TCL 28.41 KB | None | 0 0
  1. # -*- tab-width: 4; indent-tabs-mode: t; -*-
  2. # rss-synd.tcl -- 0.5.1
  3. #
  4. #   Highly configurable asynchronous RSS & Atom feed reader for Eggdrops
  5. #     written in TCL. Supports multiple feeds, gzip compressed feeds,
  6. #     automatically messaging channels with updates at set intervals,
  7. #     custom private/channel triggers and more.
  8. #
  9. # Copyright (c) 2011 Andrew Scott, HM2K
  10. #
  11. # Name: RSS & Atom Syndication Script for Eggdrop
  12. # Author: Andrew Scott <andrew.scott@wizzer-it.com>
  13. # Author: HM2K <irc@hm2k.org>
  14. # License: See LICENSE file
  15. # Link: http://code.google.com/p/rss-synd/
  16. # Tags: rss, atom, syndication
  17. # Updated: 05-Jan-2011
  18. #
  19. ###Usage
  20. # See README file
  21. #
  22. ###Revisions
  23. # See HISTORY file
  24.  
  25. #
  26. # Include Settings
  27. #
  28. if {[catch {source scripts/rss-synd-settings.tcl} err]} {
  29.   putlog "Error: Could not load 'rss-synd-settings.tcl file.'";
  30. }
  31.  
  32. proc ::rss-synd::init {args} {
  33.     variable rss
  34.     variable default
  35.     variable version
  36.     variable packages
  37.  
  38.     set version(number) 0.5.1
  39.     set version(date)   "2012-02-27"
  40.  
  41.     package require http
  42.     set packages(base64) [catch {package require base64}]; # http auth
  43.     set packages(tls) [catch {package require tls}]; # https
  44.     set packages(trf) [catch {package require Trf}]; # gzip compression
  45.  
  46.     foreach feed [array names rss] {
  47.         array set tmp $default
  48.         array set tmp $rss($feed)
  49.  
  50.         set required [list "announce-output" "trigger-output" "max-depth" "update-interval" "timeout" "channels" "output" "user-agent" "url" "database" "trigger-type" "announce-type"]
  51.         foreach {key value} [array get tmp] {
  52.             if {[set ptr [lsearch -exact $required $key]] >= 0} {
  53.                 set required [lreplace $required $ptr $ptr]
  54.             }
  55.         }
  56.  
  57.         if {[llength $required] == 0} {
  58.             regsub -nocase -all -- {@@feedid@@} $tmp(trigger) $feed tmp(trigger)
  59.  
  60.             set ulist [regexp -nocase -inline -- {(http(?:s?))://(?:(.[^:]+:.[^@]+)?)(?:@?)(.*)} $tmp(url)]
  61.  
  62.             if {[llength $ulist] == 0} {
  63.                 putlog "\002RSS Error\002: Unable to parse URL, Invalid format for feed \"$feed\"."
  64.                 unset rss($feed)
  65.                 continue
  66.             }
  67.  
  68.             set tmp(url) "[lindex $ulist 1]://[lindex $ulist 3]"
  69.  
  70.             if {[lindex $ulist 1] == "https"} {
  71.                 if {$packages(tls) != 0} {
  72.                     putlog "\002RSS Error\002: Unable to find tls package required for https, unloaded feed \"$feed\"."
  73.                     unset rss($feed)
  74.                     continue
  75.                 }
  76.  
  77.                 ::http::register https 443 ::tls::socket
  78.             }
  79.  
  80.             if {(![info exists tmp(url-auth)]) || ($tmp(url-auth) == "")} {
  81.                 set tmp(url-auth) ""
  82.  
  83.                 if {[lindex $ulist 2] != ""} {
  84.                     if {$packages(base64) != 0} {
  85.                         putlog "\002RSS Error\002: Unable to find base64 package required for http authentication, unloaded feed \"$feed\"."
  86.                         unset rss($feed)
  87.                         continue
  88.                     }
  89.  
  90.                     set tmp(url-auth) [::base64::encode [lindex $ulist 2]]
  91.                 }
  92.             }
  93.  
  94.             if {[regexp {^[0123]{1}:[0123]{1}$} $tmp(trigger-type)] != 1} {
  95.                 putlog "\002RSS Error\002: Invalid 'trigger-type' syntax for feed \"$feed\"."
  96.                 unset rss($feed)
  97.                 continue
  98.             }
  99.  
  100.             set tmp(trigger-type) [split $tmp(trigger-type) ":"]
  101.  
  102.             if {([info exists tmp(charset)]) && ([lsearch -exact [encoding names] [string tolower $tmp(charset)]] < 0)} {
  103.                 putlog "\002RSS Error\002: Unable to load feed \"$feed\", unknown encoding \"$tmp(charset)\"."
  104.                 unset rss($feed)
  105.                 continue
  106.             }
  107.            
  108.             if {([info exists tmp(feedencoding)]) && ([lsearch -exact [encoding names] [string tolower $tmp(feedencoding)]] < 0)} {
  109.                 putlog "\002RSS Error\002: Unable to load feed \"$feed\", unknown feedencoding \"$tmp(feedencoding)\"."
  110.                 unset rss($feed)
  111.                 continue
  112.             }
  113.  
  114.             set tmp(updated) 0
  115.             if {([file exists $tmp(database)]) && ([set mtime [file mtime $tmp(database)]] < [unixtime])} {
  116.                 set tmp(updated) [file mtime $tmp(database)]
  117.             }
  118.  
  119.             set rss($feed) [array get tmp]
  120.         } else {
  121.             putlog "\002RSS Error\002: Unable to load feed \"$feed\", missing one or more required settings. \"[join $required ", "]\""
  122.             unset rss($feed)
  123.         }
  124.  
  125.         unset tmp
  126.     }
  127.  
  128.     bind evnt -|- prerehash [namespace current]::deinit
  129.     bind time -|- {* * * * *} [namespace current]::feed_get
  130.     bind pubm -|- {* *} [namespace current]::trigger
  131.     bind msgm -|- {*} [namespace current]::trigger
  132.  
  133.     putlog "\002RSS Syndication Script v$version(number)\002 ($version(date)): Loaded."
  134. }
  135.  
  136. proc ::rss-synd::deinit {args} {
  137.     catch {unbind evnt -|- prerehash [namespace current]::deinit}
  138.     catch {unbind time -|- {* * * * *} [namespace current]::feed_get}
  139.     catch {unbind pubm -|- {* *} [namespace current]::trigger}
  140.     catch {unbind msgm -|- {*} [namespace current]::trigger}
  141.  
  142.     foreach child [namespace children] {
  143.         catch {[set child]::deinit}
  144.     }
  145.  
  146.     namespace delete [namespace current]
  147. }
  148.  
  149. #
  150. # Trigger Function
  151. ##
  152.  
  153. proc ::rss-synd::trigger {nick user handle args} {
  154.     variable rss
  155.     variable default
  156.  
  157.     set i 0
  158.     set chan ""
  159.     if {[llength $args] == 2} {
  160.         set chan [lindex $args 0]
  161.         incr i
  162.     }
  163.     set text [lindex $args $i]
  164.  
  165.     array set tmp $default
  166.  
  167.     if {[info exists tmp(trigger)]} {
  168.         regsub -all -- {@@(.*?)@@} $tmp(trigger) "" tmp_trigger
  169.         set tmp_trigger [string trimright $tmp_trigger]
  170.  
  171.         if {[string equal -nocase $text $tmp_trigger]} {
  172.             set list_feeds [list]
  173.         }
  174.     }
  175.  
  176.     unset -nocomplain tmp tmp_trigger
  177.  
  178.     foreach name [array names rss] {
  179.         array set feed $rss($name)
  180.  
  181.         if {(![info exists list_feeds]) && \
  182.             ([string equal -nocase $text $feed(trigger)])} {
  183.             if {(![[namespace current]::check_channel $feed(channels) $chan]) && \
  184.                 ([string length $chan] != 0)} {
  185.                 continue
  186.             }
  187.  
  188.             set feed(nick) $nick
  189.  
  190.             if {$chan != ""} {
  191.                 set feed(type) [lindex $feed(trigger-type) 0]
  192.                 set feed(channels) $chan
  193.             } else {
  194.                 set feed(type) [lindex $feed(trigger-type) 1]
  195.                 set feed(channels) ""
  196.             }
  197.  
  198.             if {[catch {set data [[namespace current]::feed_read]} error] == 0} {
  199.                 if {![[namespace current]::feed_info $data]} {
  200.                     putlog "\002RSS Error\002: Invalid feed database file format ($feed(database))!"
  201.                     return
  202.                 }
  203.  
  204.                 if {$feed(trigger-output) > 0} {
  205.                     set feed(announce-output) $feed(trigger-output)
  206.  
  207.                     [namespace current]::feed_output $data
  208.                 }
  209.             } else {
  210.                 putlog "\002RSS Warning\002: $error."
  211.             }
  212.         } elseif {[info exists list_feeds]} {
  213.             if {$chan != ""} {
  214.                 # triggered from a channel
  215.                 if {[[namespace current]::check_channel $feed(channels) $chan]} {
  216.                     lappend list_feeds $feed(trigger)
  217.                 }
  218.             } else {
  219.                 # triggered from a privmsg
  220.                 foreach tmp_chan $feed(channels) {
  221.                     if {([catch {botonchan $tmp_chan}] == 0) && \
  222.                         ([onchan $nick $tmp_chan])} {
  223.                         lappend list_feeds $feed(trigger)
  224.                         continue
  225.                     }
  226.                 }
  227.             }
  228.         }
  229.     }
  230.  
  231.     if {[info exists list_feeds]} {
  232.         if {[llength $list_feeds] == 0} {
  233.             lappend list_feeds "None"
  234.         }
  235.  
  236.         lappend list_msgs "Available feeds: [join $list_feeds ", "]."
  237.  
  238.         if {$chan != ""} {
  239.             set list_type [lindex $feed(trigger-type) 0]
  240.             set list_targets $chan
  241.         } else {
  242.             set list_type [lindex $feed(trigger-type) 1]
  243.             set list_targets ""
  244.         }
  245.  
  246.         [namespace current]::feed_msg $list_type $list_msgs list_targets $nick
  247.     }
  248. }
  249.  
  250. #
  251. # Feed Retrieving Functions
  252. ##
  253.  
  254. proc ::rss-synd::feed_get {args} {
  255.     variable rss
  256.  
  257.     set i 0
  258.     foreach name [array names rss] {
  259.         if {$i == 3} { break }
  260.  
  261.         array set feed $rss($name)
  262.  
  263.         if {$feed(updated) <= [expr { [unixtime] - ($feed(update-interval) * 60) }]} {
  264.             ::http::config -useragent $feed(user-agent)
  265.  
  266.             set feed(type) $feed(announce-type)
  267.             set feed(headers) [list]
  268.  
  269.             if {$feed(url-auth) != ""} {
  270.                 lappend feed(headers) "Authorization" "Basic $feed(url-auth)"
  271.             }
  272.  
  273.             if {([info exists feed(enable-gzip)]) && ($feed(enable-gzip) == 1)} {
  274.                 lappend feed(headers) "Accept-Encoding" "gzip"
  275.             }
  276.  
  277.             catch {::http::geturl "$feed(url)" -command "[namespace current]::feed_callback {[array get feed] depth 0}" -timeout $feed(timeout) -headers $feed(headers)} debug
  278.  
  279.             set feed(updated) [unixtime]
  280.             set rss($name) [array get feed]
  281.             incr i
  282.         }
  283.  
  284.         unset feed
  285.     }
  286. }
  287.  
  288. proc ::rss-synd::feed_callback {feedlist args} {
  289.     set token [lindex $args end]
  290.     array set feed $feedlist
  291.  
  292.     upvar 0 $token state
  293.  
  294.     if {[set status $state(status)] != "ok"} {
  295.         if {$status == "error"} { set status $state(error) }
  296.         putlog "\002RSS HTTP Error\002: $state(url) (State: $status)"
  297.         ::http::cleanup $token
  298.         return 1
  299.     }
  300.  
  301.     array set meta $state(meta)
  302.  
  303.     if {([::http::ncode $token] == 302) || ([::http::ncode $token] == 301)} {
  304.         set feed(depth) [expr {$feed(depth) + 1 }]
  305.  
  306.         if {$feed(depth) < $feed(max-depth)} {
  307.             catch {::http::geturl "$meta(Location)" -command "[namespace current]::feed_callback {$feedlist}" -timeout $feed(timeout) -headers $feed(headers)}
  308.         } else {
  309.             putlog "\002RSS HTTP Error\002: $state(url) (State: timeout, max refer limit reached)"
  310.         }
  311.  
  312.         ::http::cleanup $token
  313.         return 1
  314.     } elseif {[::http::ncode $token] != 200} {
  315.         putlog "\002RSS HTTP Error\002: $state(url) ($state(http))"
  316.         ::http::cleanup $token
  317.         return 1
  318.     }
  319.  
  320.     set data [::http::data $token]
  321.    
  322.     if {[info exists feed(feedencoding)]} {
  323.         set data [encoding convertfrom [string tolower $feed(feedencoding)] $data]
  324.     }
  325.  
  326.     if {[info exists feed(charset)]} {
  327.         if {[string tolower $feed(charset)] == "utf-8" && [is_utf8_patched]} {
  328.             #do nothing, already utf-8
  329.         } else {
  330.             set data [encoding convertto [string tolower $feed(charset)] $data]
  331.         }
  332.     }
  333.  
  334.     if {([info exists meta(Content-Encoding)]) && \
  335.         ([string equal $meta(Content-Encoding) "gzip"])} {
  336.         if {[catch {[namespace current]::feed_gzip $data} data] != 0} {
  337.             putlog "\002RSS Error\002: Unable to decompress \"$state(url)\": $data"
  338.             ::http::cleanup $token
  339.             return 1
  340.         }
  341.     }
  342.  
  343.     if {[catch {[namespace current]::xml_list_create $data} data] != 0} {
  344.         putlog "\002RSS Error\002: Unable to parse feed properly, parser returned error. \"$state(url)\""
  345.         ::http::cleanup $token
  346.         return 1
  347.     }
  348.  
  349.     if {[string length $data] == 0} {
  350.         putlog "\002RSS Error\002: Unable to parse feed properly, no data returned. \"$state(url)\""
  351.         ::http::cleanup $token
  352.         return 1
  353.     }
  354.  
  355.     set odata ""
  356.     if {[catch {set odata [[namespace current]::feed_read]} error] != 0} {
  357.         putlog "\002RSS Warning\002: $error."
  358.     }
  359.  
  360.     if {![[namespace current]::feed_info $data]} {
  361.         putlog "\002RSS Error\002: Invalid feed format ($state(url))!"
  362.         ::http::cleanup $token
  363.         return 1
  364.     }
  365.  
  366.     ::http::cleanup $token
  367.  
  368.     if {[catch {[namespace current]::feed_write $data} error] != 0} {
  369.         putlog "\002RSS Database Error\002: $error."
  370.         return 1
  371.     }
  372.  
  373.     if {$feed(announce-output) > 0} {
  374.         [namespace current]::feed_output $data $odata
  375.     }
  376. }
  377.  
  378. proc ::rss-synd::feed_info {data {target "feed"}} {
  379.     upvar 1 $target feed
  380.     set length [[namespace current]::xml_get_info $data [list -1 "*"]]
  381.  
  382.     for {set i 0} {$i < $length} {incr i} {
  383.         set type [[namespace current]::xml_get_info $data [list $i "*"] "name"]
  384.  
  385.         # tag-name: the name of the element that contains each article and its data
  386.         # tag-list: the position in the xml structure where all 'tag-name' reside
  387.         switch [string tolower $type] {
  388.             rss {
  389.                 # RSS v0.9x & x2.0
  390.                 set feed(tag-list) [list 0 "channel"]
  391.                 set feed(tag-name) "item"
  392.                 break
  393.             }
  394.             rdf:rdf {
  395.                 # RSS v1.0
  396.                 set feed(tag-list) [list]
  397.                 set feed(tag-name) "item"
  398.                 break
  399.             }
  400.             feed {
  401.                 # ATOM
  402.                 set feed(tag-list) [list]
  403.                 set feed(tag-name) "entry"
  404.                 break
  405.             }
  406.         }
  407.     }
  408.  
  409.     if {![info exists feed(tag-list)]} {
  410.         return 0
  411.     }
  412.  
  413.     set feed(tag-feed) [list 0 $type]
  414.  
  415.     return 1
  416. }
  417.  
  418. # decompress gzip formatted data
  419. proc ::rss-synd::feed_gzip {cdata} {
  420.     return $cdata
  421.     }
  422. #   variable packages
  423. #
  424. #   if {(![info exists packages(trf)]) || \
  425. #       ($packages(trf) != 0)} {
  426. #       error "Trf package not found."
  427. #   }
  428. #
  429. #   # remove the 10 byte gzip header and 8 byte footer
  430. #   set cdata [string range $cdata 10 [expr { [string length $cdata] - 9 } ]]
  431. #
  432. #   # decompress the raw data
  433. #   if {[catch {zip -mode decompress -nowrap 1 $cdata} data] != 0} {
  434. #       error $data
  435. #   }
  436. #
  437. #   return $data
  438. #}
  439.  
  440. proc ::rss-synd::feed_read { } {
  441.     upvar 1 feed feed
  442.  
  443.     if {[catch {open $feed(database) "r"} fp] != 0} {
  444.         error $fp
  445.     }
  446.  
  447.     set data [read -nonewline $fp]
  448.  
  449.     close $fp
  450.  
  451.     return $data
  452. }
  453.  
  454. proc ::rss-synd::feed_write {data} {
  455.     upvar 1 feed feed
  456.  
  457.     if {[catch {open $feed(database) "w+"} fp] != 0} {
  458.         error $fp
  459.     }
  460.  
  461.     set data [string map { "\n" "" "\r" "" } $data]
  462.  
  463.     puts -nonewline $fp $data
  464.  
  465.     close $fp
  466. }
  467.  
  468. #
  469. # XML Functions
  470. ##
  471.  
  472. proc ::rss-synd::xml_list_create {xml_data} {
  473.     set xml_list [list]
  474.     set ns_current [namespace current]
  475.  
  476.     set ptr 0
  477.     while {[set tag_start [${ns_current}::xml_get_position $xml_data $ptr]] != ""} {
  478.         set tag_start_first [lindex $tag_start 0]
  479.         set tag_start_last [lindex $tag_start 1]
  480.  
  481.         set tag_string [string range $xml_data $tag_start_first $tag_start_last]
  482.  
  483.         # move the pointer to the next character after the current tag
  484.         set last_ptr $ptr
  485.         set ptr [expr { $tag_start_last + 2 }]
  486.  
  487.         array set tag [list]
  488.         # match 'special' tags that dont close
  489.         if {[regexp -nocase -- {^!(\[CDATA|--|DOCTYPE)} $tag_string]} {
  490.             set tag_data $tag_string
  491.  
  492.             regexp -nocase -- {^!\[CDATA\[(.*?)\]\]$} $tag_string -> tag_data
  493.             regexp -nocase -- {^!--(.*?)--$} $tag_string -> tag_data
  494.  
  495.             if {[info exists tag_data]} {
  496.                 set tag(data) [${ns_current}::xml_escape $tag_data]
  497.             }
  498.         } else {
  499.             # we should only ever encounter opening tags, if we hit a closing one somethings wrong
  500.             if {[string match {[/]*} $tag_string]} {
  501.                 putlog "\002RSS Malformed Feed\002: Tag not open: \"<$tag_string>\" ($tag_start_first => $tag_start_last)"
  502.                 continue
  503.             }
  504.  
  505.             # split up the tag name and attributes
  506.             regexp -- {(.[^ \/\n\r]*)(?: |\n|\r\n|\r|)(.*?)$} $tag_string -> tag_name tag_args
  507.             set tag(name) [${ns_current}::xml_escape $tag_name]
  508.  
  509.             # split up all of the tags attributes
  510.             set tag(attrib) [list]
  511.             if {[string length $tag_args] > 0} {
  512.                 set values [regexp -inline -all -- {(?:\s*|)(.[^=]*)=["'](.[^"']*)["']} $tag_args]
  513.  
  514.                 foreach {r_match r_tag r_value} $values {
  515.                     lappend tag(attrib) [${ns_current}::xml_escape $r_tag] [${ns_current}::xml_escape $r_value]
  516.                 }
  517.             }
  518.  
  519.             # find the end tag of non-self-closing tags
  520.             if {(![regexp {(\?|!|/)(\s*)$} $tag_args]) || \
  521.                 (![string match "\?*" $tag_string])} {
  522.                 set tmp_num 1
  523.                 set tag_success 0
  524.                 set tag_end_last $ptr
  525.  
  526.                 # find the correct closing tag if there are nested elements
  527.                 #  with the same name
  528.                 while {$tmp_num > 0} {
  529.                     # search for a possible closing tag
  530.                     set tag_success [regexp -indices -start $tag_end_last -- "</$tag_name>" $xml_data tag_end]
  531.  
  532.                     set last_tag_end_last $tag_end_last
  533.  
  534.                     set tag_end_first [lindex $tag_end 0]
  535.                     set tag_end_last [lindex $tag_end 1]
  536.  
  537.                     # check to see if there are any NEW opening tags within the
  538.                     #  previous closing tag and the new closing one
  539.                     incr tmp_num [regexp -all -- "<$tag_name\(\[\\s\\t\\n\\r\]+\(\[^/>\]*\)?\)?>" [string range $xml_data $last_tag_end_last $tag_end_last]]
  540.  
  541.                     incr tmp_num -1
  542.                 }
  543.  
  544.                 if {$tag_success == 0} {
  545.                     putlog "\002RSS Malformed Feed\002: Tag not closed: \"<$tag_name>\""
  546.                     return
  547.                 }
  548.  
  549.                 # set the pointer to after the last closing tag
  550.                 set ptr [expr { $tag_end_last + 1 }]
  551.  
  552.                 # remember tag_start*'s character index doesnt include the tag start and end characters
  553.                 set xml_sub_data [string range $xml_data [expr { $tag_start_last + 2 }] [expr { $tag_end_first - 1 }]]
  554.  
  555.                 # recurse the data within the currently open tag
  556.                 set result [${ns_current}::xml_list_create $xml_sub_data]
  557.  
  558.                 # set the list data returned from the recursion we just performed
  559.                 if {[llength $result] > 0} {
  560.                     set tag(children) $result
  561.  
  562.                 # set the current data we have because we're already at the end of a branch
  563.                 #  (ie: the recursion didnt return any data)
  564.                 } else {
  565.                     set tag(data) [${ns_current}::xml_escape $xml_sub_data]
  566.                 }
  567.             }
  568.         }
  569.  
  570.         # insert any plain data that appears before the current element
  571.         if {$last_ptr != [expr { $tag_start_first - 1 }]} {
  572.             lappend xml_list [list "data" [${ns_current}::xml_escape [string range $xml_data $last_ptr [expr { $tag_start_first - 2 }]]]]
  573.         }
  574.  
  575.         # inset tag data
  576.         lappend xml_list [array get tag]
  577.  
  578.         unset tag
  579.     }
  580.  
  581.     # if there is still plain data left add it
  582.     if {$ptr < [string length $xml_data]} {
  583.         lappend xml_list [list "data" [${ns_current}::xml_escape [string range $xml_data $ptr end]]]
  584.     }
  585.  
  586.     return $xml_list
  587. }
  588.  
  589. # simple escape function
  590. proc ::rss-synd::xml_escape {string} {
  591.     regsub -all -- {([\{\}])} $string {\\\1} string
  592.  
  593.     return $string
  594. }
  595.  
  596. # this function is to replace:
  597. #  regexp -indices -start $ptr {<(!\[CDATA\[.+?\]\]|!--.+?--|!DOCTYPE.+?|.+?)>} $xml_data -> tag_start
  598. # which doesnt work correctly with tcl's re_syntax
  599. proc ::rss-synd::xml_get_position {xml_data ptr} {
  600.     set tag_start [list -1 -1]
  601.  
  602.     regexp -indices -start $ptr {<(.+?)>} $xml_data -> tmp(tag)
  603.     regexp -indices -start $ptr {<(!--.*?--)>} $xml_data -> tmp(comment)
  604.     regexp -indices -start $ptr {<(!DOCTYPE.+?)>} $xml_data -> tmp(doctype)
  605.     regexp -indices -start $ptr {<(!\[CDATA\[.+?\]\])>} $xml_data -> tmp(cdata)
  606.  
  607.     # 'tag' regexp should be compared last
  608.     foreach name [lsort [array names tmp]] {
  609.         set tmp_s [split $tmp($name)]
  610.         if {( ([lindex $tmp_s 0] < [lindex $tag_start 0]) && \
  611.               ([lindex $tmp_s 0] > -1) ) || \
  612.             ([lindex $tag_start 0] == -1)} {
  613.             set tag_start $tmp($name)
  614.         }
  615.     }
  616.  
  617.     if {([lindex $tag_start 0] == -1) || \
  618.         ([lindex $tag_start 1] == -1)}  {
  619.         set tag_start ""
  620.     }
  621.  
  622.     return $tag_start
  623. }
  624.  
  625. # recursivly flatten all data without tags or attributes
  626. proc ::rss-synd::xml_list_flatten {xml_list {level 0}} {
  627.     set xml_string ""
  628.  
  629.     foreach e_list $xml_list {
  630.         if {[catch {array set e_array $e_list}] != 0} {
  631.             return $xml_list
  632.         }
  633.  
  634.         if {[info exists e_array(children)]} {
  635.             append xml_string [[namespace current]::xml_list_flatten $e_array(children) [expr { $level + 1 }]]
  636.         } elseif {[info exists e_array(data)]} {
  637.             append xml_string $e_array(data)
  638.         }
  639.  
  640.         unset e_array
  641.     }
  642.  
  643.     return $xml_string
  644. }
  645.  
  646. # returns information on a data structure when given a path.
  647. #  paths can be specified using: [struct number] [struct name] <...>
  648. proc ::rss-synd::xml_get_info {xml_list path {element "data"}} {
  649.     set i 0
  650.  
  651.     foreach {t_data} $xml_list {
  652.         array set t_array $t_data
  653.  
  654.         # if the name doesnt exist set it so we can still reference the data
  655.         #  using the 'stuct name' *
  656.         if {![info exists t_array(name)]} {
  657.             set t_array(name) ""
  658.         }
  659.  
  660.         if {[string match -nocase [lindex $path 1] $t_array(name)]} {
  661.  
  662.             if {$i == [lindex $path 0]} {
  663.                 set result ""
  664.  
  665.                 if {([llength $path] == 2) && \
  666.                     ([info exists t_array($element)])} {
  667.                     set result $t_array($element)
  668.                 } elseif {[info exists t_array(children)]} {
  669.                     # shift the first path reference of the front of the path and recurse
  670.                     set result [[namespace current]::xml_get_info $t_array(children) [lreplace $path 0 1] $element]
  671.                 }
  672.  
  673.                 return $result
  674.             }
  675.  
  676.             incr i
  677.         }
  678.  
  679.         unset t_array
  680.     }
  681.  
  682.     if {[lindex $path 0] == -1} {
  683.         return $i
  684.     }
  685. }
  686.  
  687. # converts 'args' into a list in the same order
  688. proc ::rss-synd::xml_join_tags {args} {
  689.     set list [list]
  690.  
  691.     foreach tag $args {
  692.         foreach item $tag {
  693.             if {[string length $item] > 0} {
  694.                 lappend list $item
  695.             }
  696.         }
  697.     }
  698.  
  699.     return $list
  700. }
  701.  
  702. #
  703. # Output Feed Functions
  704. ##
  705.  
  706. proc ::rss-synd::feed_output {data {odata ""}} {
  707.     upvar 1 feed feed
  708.     set msgs [list]
  709.  
  710.     set path [[namespace current]::xml_join_tags $feed(tag-feed) $feed(tag-list) -1 $feed(tag-name)]
  711.     set count [[namespace current]::xml_get_info $data $path]
  712.  
  713.     for {set i 0} {($i < $count) && ($i < $feed(announce-output))} {incr i} {
  714.         set tmpp [[namespace current]::xml_join_tags $feed(tag-feed) $feed(tag-list) $i $feed(tag-name)]
  715.         set tmpd [[namespace current]::xml_get_info $data $tmpp "children"]
  716.  
  717.         if {[[namespace current]::feed_compare $odata $tmpd]} {
  718.             break
  719.         }
  720.  
  721.         set tmp_msg [[namespace current]::cookie_parse $data $i]
  722.         if {(![info exists feed(output-order)]) || \
  723.             ($feed(output-order) == 0)} {
  724.             set msgs [linsert $msgs 0 $tmp_msg]
  725.         } else {
  726.             lappend msgs $tmp_msg
  727.         }
  728.     }
  729.  
  730.     set nick [expr {[info exists feed(nick)] ? $feed(nick) : ""}]
  731.  
  732.     [namespace current]::feed_msg $feed(type) $msgs $feed(channels) $nick
  733. }
  734.  
  735. proc ::rss-synd::feed_msg {type msgs targets {nick ""}} {
  736.     # check if our target is a nick
  737.     if {(($nick != "") && \
  738.          ($targets == "")) || \
  739.         ([regexp -- {[23]} $type])} {
  740.         set targets $nick
  741.     }
  742.  
  743.     foreach msg $msgs {
  744.         foreach chan $targets {
  745.             if {([catch {botonchan $chan}] == 0) || \
  746.                 ([regexp -- {^[#&]} $chan] == 0)} {
  747.                 foreach line [split $msg "\n"] {
  748.                     if {($type == 1) || ($type == 3)} {
  749.                         putserv "NOTICE $chan :$line"
  750.                     } else {
  751.                         putserv "PRIVMSG $chan :$line"
  752.                     }
  753.                 }
  754.             }
  755.         }
  756.     }
  757. }
  758.  
  759. proc ::rss-synd::feed_compare {odata data} {
  760.     if {$odata == ""} {
  761.         return 0
  762.     }
  763.  
  764.     upvar 1 feed feed
  765.     array set ofeed [list]
  766.     [namespace current]::feed_info $odata "ofeed"
  767.  
  768.     if {[array size ofeed] == 0} {
  769.         putlog "\002RSS Error\002: Invalid feed format ($feed(database))!"
  770.         return 0
  771.     }
  772.  
  773.     if {[string equal -nocase [lindex $feed(tag-feed) 1] "feed"]} {
  774.         set cmp_items [list {0 "id"} "children" "" 3 {0 "link"} "attrib" "href" 2 {0 "title"} "children" "" 1]
  775.     } else {
  776.         set cmp_items [list {0 "guid"} "children" "" 3 {0 "link"} "children" "" 2 {0 "title"} "children" "" 1]
  777.     }
  778.  
  779.     set path [[namespace current]::xml_join_tags $ofeed(tag-feed) $ofeed(tag-list) -1 $ofeed(tag-name)]
  780.     set count [[namespace current]::xml_get_info $odata $path]
  781.  
  782.     for {set i 0} {$i < $count} {incr i} {
  783.         # extract the current article from the database
  784.         set tmpp [[namespace current]::xml_join_tags $ofeed(tag-feed) $ofeed(tag-list) $i $ofeed(tag-name)]
  785.         set tmpd [[namespace current]::xml_get_info $odata $tmpp "children"]
  786.  
  787.         set w 0; # weight value
  788.         set m 0; # item tag matches
  789.         foreach {cmp_path cmp_element cmp_attrib cmp_weight} $cmp_items {
  790.             # try and extract the tag info from the current article
  791.             set oresult [[namespace current]::xml_get_info $tmpd $cmp_path $cmp_element]
  792.             if {$cmp_element == "attrib"} {
  793.                 array set tmp $oresult
  794.                 catch {set oresult $tmp($cmp_attrib)}
  795.                 unset tmp
  796.             }
  797.  
  798.             # if the tag doesnt exist in the article ignore it
  799.             if {$oresult == ""} { continue }
  800.  
  801.             incr m
  802.  
  803.             # extract the tag info from the current article
  804.             set result [[namespace current]::xml_get_info $data $cmp_path $cmp_element]
  805.             if {$cmp_element == "attrib"} {
  806.                 array set tmp $result
  807.                 catch {set result $tmp($cmp_attrib)}
  808.                 unset tmp
  809.             }
  810.  
  811.             if {[string equal -nocase $oresult $result]} {
  812.                 set w [expr { $w + $cmp_weight }]
  813.             }
  814.         }
  815.  
  816.         # value of 100 or more means its a match
  817.         if {($m > 0) && \
  818.             ([expr { round(double($w) / double($m) * 100) }] >= 100)} {
  819.             return 1
  820.         }
  821.     }
  822.  
  823.     return 0
  824. }
  825.  
  826. #
  827. # Cookie Parsing Functions
  828. ##
  829.  
  830. proc ::rss-synd::cookie_parse {data current} {
  831.     upvar 1 feed feed
  832.     set output $feed(output)
  833.  
  834.     set eval 0
  835.     if {([info exists feed(evaluate-tcl)]) && ($feed(evaluate-tcl) == 1)} { set eval 1 }
  836.     set variable_index 0
  837.  
  838.     set matches [regexp -inline -nocase -all -- {@@(.*?)@@} $output]
  839.     foreach {match tmpc} $matches {
  840.         set tmpc [split $tmpc "!"]
  841.         set index 0
  842.         set cookie [list]
  843.         incr variable_index
  844.         foreach piece $tmpc {
  845.             set tmpp [regexp -nocase -inline -all -- {^(.*?)\((.*?)\)|(.*?)$} $piece]
  846.  
  847.             if {[lindex $tmpp 3] == ""} {
  848.                 lappend cookie [lindex $tmpp 2] [lindex $tmpp 1]
  849.             } else {
  850.                 lappend cookie 0 [lindex $tmpp 3]
  851.             }
  852.         }
  853.  
  854.         # replace tag-item's index with the current article
  855.         if {[string equal -nocase $feed(tag-name) [lindex $cookie 1]]} {
  856.             set cookie [[namespace current]::xml_join_tags $feed(tag-list) [lreplace $cookie $index $index $current]]
  857.         }
  858.  
  859.         set cookie [[namespace current]::xml_join_tags $feed(tag-feed) $cookie]
  860.  
  861.         if {[set tmp [[namespace current]::cookie_replace $cookie $data]] != ""} {
  862.             set tmp [[namespace current]::xml_list_flatten $tmp]
  863.  
  864.             regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $match {\\\1} match
  865.             set feed_data "[string map { "&" "\\\x26" } [[namespace current]::html_decode $eval $tmp]]"
  866.             if {$eval == 1} {
  867.                 # We are going to eval this string so we can't insert untrusted
  868.                 # text. Instead create variables and insert references to those
  869.                 # variables that will be expanded in the subst call below.
  870.                 set cookie_val($variable_index) $feed_data
  871.                 regsub -- $match $output "\$cookie_val($variable_index)" output
  872.             } else {
  873.                 regsub -- $match $output $feed_data output
  874.             }
  875.         }
  876.     }
  877.  
  878.     # remove empty cookies
  879.     if {(![info exists feed(remove-empty)]) || ($feed(remove-empty) == 1)} {
  880.         regsub -nocase -all -- "@@.*?@@" $output "" output
  881.     }
  882.  
  883.     # evaluate tcl code
  884.     if {$eval == 1} {
  885.         if {[catch {set output [subst $output]} error] != 0} {
  886.             putlog "\002RSS Eval Error\002: $error"
  887.         }
  888.     }
  889.  
  890.     return $output
  891. }
  892.  
  893. proc ::rss-synd::cookie_replace {cookie data} {
  894.     set element "children"
  895.  
  896.     set tags [list]
  897.     foreach {num section} $cookie {
  898.         if {[string equal "=" [string range $section 0 0]]} {
  899.             set attrib [string range $section 1 end]
  900.             set element "attrib"
  901.             break
  902.         } else {
  903.             lappend tags $num $section
  904.         }
  905.     }
  906.  
  907.     set return [[namespace current]::xml_get_info $data $tags $element]
  908.  
  909.     if {[string equal -nocase "attrib" $element]} {
  910.         array set tmp $return
  911.  
  912.         if {[catch {set return $tmp($attrib)}] != 0} {
  913.             return
  914.         }
  915.     }
  916.  
  917.     return $return
  918. }
  919.  
  920. #
  921. # Misc Functions
  922. ##
  923.  
  924. proc ::rss-synd::html_decode {eval data {loop 0}} {
  925.     if {![string match *&* $data]} {return $data}
  926.     array set chars {
  927.              nbsp   \x20 amp    \x26 quot   \x22 lt     \x3C
  928.              gt     \x3E iexcl  \xA1 cent   \xA2 pound  \xA3
  929.              curren \xA4 yen    \xA5 brvbar \xA6 brkbar \xA6
  930.              sect   \xA7 uml    \xA8 die    \xA8 copy   \xA9
  931.              ordf   \xAA laquo  \xAB not    \xAC shy    \xAD
  932.              reg    \xAE hibar  \xAF macr   \xAF deg    \xB0
  933.              plusmn \xB1 sup2   \xB2 sup3   \xB3 acute  \xB4
  934.              micro  \xB5 para   \xB6 middot \xB7 cedil  \xB8
  935.              sup1   \xB9 ordm   \xBA raquo  \xBB frac14 \xBC
  936.              frac12 \xBD frac34 \xBE iquest \xBF Agrave \xC0
  937.              Aacute \xC1 Acirc  \xC2 Atilde \xC3 Auml   \xC4
  938.              Aring  \xC5 AElig  \xC6 Ccedil \xC7 Egrave \xC8
  939.              Eacute \xC9 Ecirc  \xCA Euml   \xCB Igrave \xCC
  940.              Iacute \xCD Icirc  \xCE Iuml   \xCF ETH    \xD0
  941.              Dstrok \xD0 Ntilde \xD1 Ograve \xD2 Oacute \xD3
  942.              Ocirc  \xD4 Otilde \xD5 Ouml   \xD6 times  \xD7
  943.              Oslash \xD8 Ugrave \xD9 Uacute \xDA Ucirc  \xDB
  944.              Uuml   \xDC Yacute \xDD THORN  \xDE szlig  \xDF
  945.              agrave \xE0 aacute \xE1 acirc  \xE2 atilde \xE3
  946.              auml   \xE4 aring  \xE5 aelig  \xE6 ccedil \xE7
  947.              egrave \xE8 eacute \xE9 ecirc  \xEA euml   \xEB
  948.              igrave \xEC iacute \xED icirc  \xEE iuml   \xEF
  949.              eth    \xF0 ntilde \xF1 ograve \xF2 oacute \xF3
  950.              ocirc  \xF4 otilde \xF5 ouml   \xF6 divide \xF7
  951.              oslash \xF8 ugrave \xF9 uacute \xFA ucirc  \xFB
  952.              uuml   \xFC yacute \xFD thorn  \xFE yuml   \xFF
  953.              ensp   \x20 emsp   \x20 thinsp \x20 zwnj   \x20
  954.              zwj    \x20 lrm    \x20 rlm    \x20 euro   \x80
  955.              sbquo  \x82 bdquo  \x84 hellip \x85 dagger \x86
  956.              Dagger \x87 circ   \x88 permil \x89 Scaron \x8A
  957.              lsaquo \x8B OElig  \x8C oelig  \x8D lsquo  \x91
  958.              rsquo  \x92 ldquo  \x93 rdquo  \x94 ndash  \x96
  959.              mdash  \x97 tilde  \x98 scaron \x9A rsaquo \x9B
  960.              Yuml   \x9F apos   \x27
  961.             }
  962.  
  963.     regsub -all -- {<(.[^>]*)>} $data " " data
  964.  
  965.     if {$eval != 1} {
  966.         regsub -all -- {([\$\[\]\{\}\(\)\\])} $data {\\\1} data
  967.     } else {
  968.         regsub -all -- {([\$\[\]\{\}\(\)\\])} $data {\\\\\\\1} data
  969.     }
  970.  
  971.     regsub -all -- {&#(\d+);} $data {[subst -nocomm -novar [format \\\u%04x [scan \1 %d]]]} data
  972.     regsub -all -- {&#x(\w+);} $data {[format %c [scan \1 %x]]} data
  973.     regsub -all -- {&([0-9a-zA-Z#]*);} $data {[if {[catch {set tmp $chars(\1)} char] == 0} { set tmp }]} data
  974.     regsub -all -- {&([0-9a-zA-Z#]*);} $data {[if {[catch {set tmp [string tolower $chars(\1)]} char] == 0} { set tmp }]} data
  975.  
  976.     regsub -nocase -all -- "\\s{2,}" $data " " data
  977.  
  978.     set data [subst $data]
  979.     if {[incr loop] == 1} {
  980.         set data [[namespace current]::html_decode 0 $data $loop]
  981.     }
  982.  
  983.     return $data
  984. }
  985.  
  986. proc ::rss-synd::is_utf8_patched {} { catch {queuesize a} err1; catch {queuesize \u0754} err2; expr {[string bytelength $err2]!=[string bytelength $err1]} }
  987.  
  988. proc ::rss-synd::check_channel {chanlist chan} {
  989.     foreach match [split $chanlist] {
  990.         if {[string equal -nocase $match $chan]} {
  991.             return 1
  992.         }
  993.     }
  994.  
  995.     return 0
  996. }
  997.  
  998. proc ::rss-synd::urldecode {str} {
  999.     regsub -all -- {([\"\$\[\]\{\}\(\)\\])} $str {\\\1} str
  1000.  
  1001.     regsub -all -- {%([aAbBcCdDeEfF0-9][aAbBcCdDeEfF0-9]);?} $str {[format %c [scan \1 %x]]} str
  1002.  
  1003.     return [subst $str]
  1004. }
  1005.  
  1006. ::rss-synd::init
Add Comment
Please, Sign In to add comment