Advertisement
Revolucent

REBOL 3 CSV Parsing Library

Apr 3rd, 2015
3,232
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
REBOL 6.85 KB | None | 0 0
  1. ; Copyright (c) 2013 Gregory Higley
  2.  
  3. ; Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
  4. ; files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
  5. ; modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software
  6. ; is furnished to do so, subject to the following conditions:
  7.  
  8. ; The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  9.  
  10. ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  11. ; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  12. ; LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  13. ; CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  14.  
  15. rebol [
  16.     Title: "Revolucent CSV Library"
  17.     Author: "Gregory Higley"
  18.     Date: 2013-04-23
  19.     Name: net.revolucent.parse.csv
  20.     Version: 3.0.0
  21.     Type: module
  22.     Exports: [csv csv-object csv-block csv-fields read-csv-file]
  23.     Needs: [
  24.         2.101.0
  25.         net.revolucent.core.v3
  26.     ]  
  27.     License: MIT
  28.     History: [
  29.         2013-04-24 {Renamed parse-csv to READ-CSV in anticipation of WRITE-CSV. Fixed a bug that occurs when the escape-char and quote-char are the same.}
  30.     ]
  31. ]
  32.  
  33. csv-fields: closure [
  34.     "Takes a block of transforms and returns a function that applies them to the CSV items in a row."
  35.     transforms [block!]
  36. ][
  37.     transforms: reduce transforms
  38.     funct [name item] [
  39.         f: select transforms name
  40.         f item
  41.     ]
  42. ]
  43.  
  44. ; Here's an example of CSV-OBJECT, which is a higher-order function.
  45. ;
  46. ; First, let's look at a usage of CSV without using CSV-OBJECT.
  47. ;
  48. ; csv/by [
  49. ;   probe read-csv {"Music For The Masses",88}
  50. ; ] funct [items] [
  51. ;   object [
  52. ;       name: items/1
  53. ;       year: to integer! items/2
  54. ;   ]
  55. ; ]
  56. ;
  57. ; For each row of output, the function passed to the /BY refinement
  58. ; creates an object. This gets pretty tedious when there are many columns, so:
  59. ;
  60. ; csv/by [
  61. ;   probe read-csv {"Music For The Masses",88}
  62. ; ] csv-object [name year]
  63. ;
  64. ; CSV-OBJECT returns a dynamically created function that maps the first
  65. ; CSV field to 'NAME and the second one to 'YEAR. We can use integers
  66. ; before the names to specify which columns we want:
  67. ;
  68. ; csv-object [2 firstname lastname 14 vin date]
  69. ;
  70. ; In the case above, the second field will be mapped to 'FIRSTNAME, the third to 'LASTNAME,
  71. ; the fourteenth to 'VIN and the fifteenth to 'DATE. All other fields will be ignored.
  72. csv-object: closure [
  73.     "Returns a function that creates an object by mapping values to names."
  74.     names [block!] "E.g., [fname lname 7 age]"
  75.     /by
  76.         transform "Transform to be applied to each field" ; It's best to make this function using CSV-FIELDS
  77.     /local
  78.         name-map
  79.         n
  80.         name
  81.         rules
  82. ][
  83.     default transform func [name item] [item]
  84.     name-map: copy []
  85.     rules: [
  86.         (n: 1)
  87.         some [
  88.             set n integer!
  89.         |   set name word! (repend name-map [n name] ++ n)
  90.         ]
  91.         end
  92.     ]
  93.     unless parse names rules [do make error! "Invalid names specifier."]
  94.     funct [
  95.         items [block!]
  96.     ][
  97.         o: copy []
  98.         foreach [n name] name-map [
  99.             repend o [to set-word! name transform name items/:n]
  100.         ]
  101.         object o
  102.     ]
  103. ]
  104.  
  105. csv-block: closure [
  106.     "Returns a function that creates a block by choosing a subset of values from another block."
  107.     indexes [block!] "E.g., [2 4 7]" ; Choose fields 2, 4, and 7 in that order
  108.     /by
  109.         transform "Transform to be applied to each field." ; It's best to make this function using CSV-FIELDS
  110. ][
  111.     default transform func [index item] [item]
  112.     funct [
  113.         items [block!]
  114.     ][
  115.         b: copy []
  116.         foreach index indexes [
  117.             append b transform index items/:index
  118.         ]
  119.         b
  120.     ]
  121. ]
  122.  
  123. ; CSV creates an environment in which CSV operations can be performed by, for
  124. ; instance, setting the quote character or the separator, etc. It then injects
  125. ; the parse-csv function into the provided block. parse-csv will parse
  126. ; a single row of CSV input according to the given settings. E.g.,
  127. ;
  128. ; csv/sep [
  129. ;   probe read-csv {a~b}
  130. ; ] #"~"
  131. ;
  132. ; The parse-csv function is valid only within the given block. This will not work:
  133. ;
  134. ; csv [] probe read-csv {a,b}
  135. ;
  136. ; (Unless of course someone has defined a different parse-csv outside of this module.)
  137. csv: funct [
  138.     body [block!]
  139.     /separator
  140.         separator-char [char!]
  141.     /quote
  142.         quote-char [char!]
  143.     /escape
  144.         escape-char [char!]
  145.     /by
  146.         transform [any-function!]
  147.     /local
  148.         chunk
  149.         item
  150. ][
  151.     default escape-char #"\"
  152.     default quote-char #"^""
  153.     default separator-char #","
  154.     default transform :identity
  155.    
  156.     replace whitespace-chars: copy " ^-" separator-char ""
  157.     replace whitespace-chars quote-char ""
  158.     replace whitespace-chars escape-char ""
  159.     whitespace: charset whitespace-chars
  160.     non-separator-chars: complement charset separator-char
  161.     non-quote-chars: complement charset quote-char
  162.     escaped-quote: rejoin [escape-char quote-char]
  163.    
  164.     items: copy []
  165.     non-quoted-item: [copy item any non-separator-chars (append items item)]
  166.     escaped-quote: rejoin [escape-char quote-char]
  167.     either equal? escape-char quote-char [
  168.         quoted-item: [
  169.             any whitespace
  170.             quote-char
  171.             any [
  172.                 escape: thru quote-char any whitespace separator-char :escape break
  173.             |   copy chunk to escaped-quote escaped-quote (repend item [chunk quote-char])
  174.             ]
  175.             copy chunk to quote-char (append item chunk)
  176.             quote-char
  177.             (append items item)
  178.             any whitespace
  179.         ]          
  180.     ][ ; quote-char and escape-char are not equal
  181.         quoted-item: [
  182.             any whitespace
  183.             quote-char
  184.             any [ copy chunk to escaped-quote escaped-quote (repend item [chunk quote-char]) ]
  185.             copy chunk to quote-char (append item chunk)
  186.             quote-char
  187.             (append items item)
  188.             any whitespace
  189.         ]                      
  190.     ]
  191.    
  192.     item-rule: [[(item: copy "") quoted-item | non-quoted-item]]
  193.     rules: [item-rule any [separator-char item-rule] end]
  194.  
  195.     do func [read-csv] body func [
  196.         line [string!]
  197.     ][
  198.         items: copy [] ; This variable's scope is CSV, not READ-CSV. It is used inside the PARSE rules.
  199.         either parse line rules [transform items] [do make error! rejoin ["Invalid line: " line]]
  200.     ]
  201. ]
  202.  
  203. read-csv-file: funct [
  204.     file [file!]
  205.     /headers "First row is header row"
  206.     /sep
  207.         sep-char [char!]
  208.     /quote
  209.         quote-char [char!]
  210.     /escape
  211.         escape-char [char!] "Defaults to none" ; I.e., assume quotes are never escaped 
  212.     /by
  213.         transform [any-function!] "Transform a row into e.g. an object"
  214. ][
  215.     default transform :identity
  216.    
  217.     body: copy [
  218.         header: headers
  219.         items: copy []
  220.         foreach line read/lines file [
  221.             unless header [
  222.                 append/only items read-csv line
  223.             ]
  224.             header: false
  225.         ]
  226.         items
  227.     ]
  228.    
  229.     apply :csv [body sep sep-char quote quote-char escape escape-char by :transform]
  230. ]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement