Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- cleanScript <- function(htmlString) {
- return( gsub( "#<script(.*?)>(.*?)</script>#is", "", htmlString ) )
- }
- cleanStyle <- function(htmlString) {
- return( gsub( "#<style(.*?)>(.*?)</style>#is", "", htmlString ) )
- }
- cleanTags <- function(htmlString) {
- return(gsub("<.*?>", "", htmlString))
- }
- cleanSekrip <- function(htmlString) {
- return( gsub( '>.*$', '></div>', htmlString ) )
- }
- cleanKabeh <- function(htmlString) {
- htmlString <- gsub( "#<script(.*?)>(.*?)</script>#is", "", htmlString )
- htmlString <- gsub( "#<style(.*?)>(.*?)</style>#is", "", htmlString )
- return( gsub("<.*?>", "", htmlString) )
- }
- cleanKabeh <- function(htmlString) {
- return( gsub("<.*?>", "", gsub( "#<style(.*?)>(.*?)</style>#is", "", gsub( \<(?:[^:]+:)?script\>.*?\<\/(?:[^:]+:)?script\>, "", htmlString ) ) ) )
- }
- library(XML)
- doc <- htmlParse(article,asText=TRUE)
- styleNodes <- getNodeSet(doc, "//style")
- styleNodes <- getNodeSet(doc, "//script")
- removeNodes(styleNodes)
- doc
Add Comment
Please, Sign In to add comment