devinteske

Parsing JSON with awk

Apr 18th, 2016
482
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Awk 7.57 KB | None | 0 0
  1. #!/usr/bin/awk -f
  2. BEGIN {
  3.     nkeys[depth = 0] = keynum = 0
  4.     building_array = building_hash = building_string = 0
  5.     if (!(valid_chars = ENVIRON["VALID_VARNAME_CHARS"])) {
  6.         for (c = 48; c < 58; c++)
  7.             valid_chars = valid_chars sprintf("%c", c)
  8.         for (c = 65; c < 91; c++)
  9.             valid_chars = valid_chars sprintf("%c", c)
  10.         for (c = 97; c < 123; c++)
  11.             valid_chars = valid_chars sprintf("%c", c)
  12.     }
  13.     if (filter_object) filter_object = "^" filter_object "$"
  14.     if (filter_value) filter_value = "^" filter_value "$"
  15. }
  16. ############################################### FUNCTIONS
  17. function dprint(msg) { if (debug) print ": DEBUG1", msg }
  18. function dprint2(msg) { if (debug >= 2) print ": DEBUG2", msg }
  19. function dprint3(msg) { if (debug >= 3) print ": DEBUG3", msg }
  20. function lprint(line) { print (local ? "local " : "") line }
  21. function trim_match()
  22. {
  23.     match_text = substr($0, RSTART, RLENGTH)
  24.     $0 = substr($0, RSTART + RLENGTH)
  25.     return match_text
  26. }
  27. function trim_keyword()
  28. {
  29.     keylen = length(keynum > 0 ? keynum : key[depth,nkeys[depth]])
  30.     keynum = 0
  31.     if (!keylen) return
  32.     x = keyword
  33.     keyword = substr(keyword, 0, length(keyword) - keylen - 1)
  34.         # NB: The "-1" is for keyword separator "_"
  35.     dprint2(sprintf("TRIM KEY ([%s] => [%s])", x, keyword))
  36. }
  37. function objsafe(name)
  38. {
  39.     gsub(("[^" valid_chars "]"), "_", name)
  40.     return name
  41. }
  42. function json_print(object, value)
  43. {
  44.     if (object !~ filter_object) return
  45.     if (value !~ filter_value) return
  46.     gsub(/'/, "&\\\\&&", value)
  47.     object = objsafe(object)
  48.     lprint(object "='" value "'")
  49. }
  50. function json_print_type(object, type)
  51. {
  52.     if (!keyword) return
  53.     if (object !~ filter_object) return
  54.     if (type)
  55.         lprint(object "_type=" type)
  56.     else if (building_array)
  57.         lprint(object "_type=array")
  58.     else if (building_hash)
  59.         lprint(object "_type=hash")
  60.     else
  61.         lprint(object "_type=scalar")
  62. }
  63. function json_print_keys(object, depth)
  64. {
  65.     keys = ""
  66.     for (k = 1; k <= nkeys[depth]; k++) {
  67.         if (printed[key[depth,k]]) continue
  68.         printed[key[depth,k]] = 1
  69.         keys = keys " " key[depth,k]
  70.     }
  71.     json_print(object, substr(keys, 2))
  72. }
  73. function json_unset_value(object)
  74. {
  75.     lprint(objsafe(object) "_value=")
  76. }
  77. function json_filtered_unset_value(object)
  78. {
  79.     if (object !~ filter_object) return
  80.     return json_unset_value(object)
  81. }
  82. function json_objname()
  83. {
  84.     objname = ""
  85.     if (building_array) {
  86.         objname = keyword "_" building_array++
  87.         dprint3(sprintf("RETURN ARRAY OBJNAME [%s]", objname))
  88.     } else if (building_hash) {
  89.         objname = keyword "_" building_hash++
  90.         dprint3(sprintf("RETURN HASH OBJNAME [%s]", objname))
  91.     } else if (building_string) {
  92.         objname = keyword
  93.         dprint3(sprintf("RETURN STRING OBJNAME [%s]", objname))
  94.     } else if (depth <= 1 && keyword) {
  95.         objname = keyword
  96.         dprint3(sprintf("RETURN ROOT OBJNAME [%s]", objname))
  97.     }
  98.  
  99.     if (objname) return objname
  100.  
  101.     # NB: If non-NULL current-depth key, increment for separator
  102.     if ((keylen = length(key[depth,nkeys[depth]]))) keylen++
  103.  
  104.     objname = substr(keyword, 0, length(keyword) - keylen) "_" nkeys[depth]
  105.     dprint3(sprintf("RETURN OBJNAME [%s]", objname))
  106.     return objname
  107. }
  108. ############################################### MAIN LOOP
  109. { while ($0) { # Loop until done processing everything on this line
  110.     if (building_string) {
  111.         while (match($0, /^[^"]*\\"/)) value = value trim_match()
  112.         if (!match($0, /^[^"]*"/)) { # No ending quote
  113.             value = value $0
  114.             next # Continue reading on next line
  115.         }
  116.         objname = json_objname()
  117.         building_string = 0
  118.         value = value substr($0, RSTART, RLENGTH - 1)
  119.         trim_match()
  120.         sub(/^[[:space:]]*,[[:space:]]*/, "")
  121.         json_print(objname, value)
  122.         json_print_type(objname)
  123.         trim_keyword()
  124.         if (depth <= 1) json_print_type(keyword "_" nk)
  125.     }
  126.     ################################### OPENING PATTERNS
  127.     else if (match($0, /^[[:space:]]*{[[:space:]]*/)) {
  128.         building_hash = (depth > 0)
  129.         if (depth > 1) {
  130.             x = keyword
  131.             nk = nkeys[depth]
  132.             keyword = keyword "_" nk
  133.             keynum = nk
  134.             dprint2(sprintf("APPEND HASH ([%s] => [%s])",
  135.                 x, keyword))
  136.         }
  137.         nkeys[++depth] = 0
  138.         trim_match()
  139.     } else if (keyword && match($0, /^[[:space:]]*\[/)) {
  140.         building_array = 1
  141.         trim_match()
  142.     }
  143.     ################################### OBJECTS
  144.     else if (match($0, /^[[:space:]]*"[^"]+"[[:space:]]*:[[:space:]]*/)) {
  145.         nkeys[depth]++
  146.         nk = nkeys[depth]
  147.         key[depth,nk] = trim_match()
  148.         sub(/^[[:space:]]*"/, "", key[depth,nk])
  149.         sub(/"[[:space:]]*:[[:space:]]*$/, "", key[depth,nk])
  150.         kp = key[depth,nk]
  151.         dprint(sprintf("START OBJECT key[%u,%u]=\"%s\"",
  152.             depth, nk, kp))
  153.         if (keyword && !building_hash)
  154.             json_print(keyword "_" nk, objsafe(kp))
  155.         if (!building_hash) {
  156.             x = keyword
  157.             keyword = keyword (keyword ? "_" : "") kp
  158.             dprint2(sprintf("APPEND KEY ([%s] => [%s])",
  159.                 x, keyword))
  160.         }
  161.     }
  162.     ################################### PROPERTIES
  163.     else if (keyword && match($0, /^[[:space:]]*"/)) {
  164.         value = ""
  165.         trim_match()
  166.         while (match($0, /^[^"]*\\"/)) value = value trim_match()
  167.         if (!match($0, /^[^"]*"/)) {
  168.             building_string = 1
  169.             value = $0
  170.             next
  171.         }
  172.         value = value substr($0, RSTART, RLENGTH - 1)
  173.         trim_match()
  174.         sub(/^[[:space:]]*,[[:space:]]*/, "")
  175.  
  176.         object = json_objname()
  177.         nk = nkeys[depth]
  178.         if (depth <= 1) {
  179.             json_print(object, value)
  180.             if (!building_array) json_print_type(object)
  181.         } else {
  182.             json_print(object, key[depth,nk])
  183.             json_print(object "_value", value)
  184.         }
  185.         if (building_hash) {
  186.             if (!value) json_filtered_unset_value(object)
  187.         } else if (!building_array) {
  188.             trim_keyword()
  189.             json_print_type(keyword "_" nk)
  190.         }
  191.     }
  192.     else if (keyword && match($0, \
  193.         /^[[:space:]]*[^[:space:],}\]]+[[:space:]]*/ \
  194.     )) {
  195.         value = trim_match()
  196.         sub(/^[[:space:]]*/, "", value)
  197.         sub(/[[:space:]]*$/, "", value)
  198.         sub(/^[[:space:]]*,[[:space:]]*/, "")
  199.         json_print(keyword, value)
  200.         json_print_type(keyword)
  201.         trim_keyword()
  202.         if (!building_array) json_print_type(keyword "_" nkeys[depth])
  203.     }
  204.     ################################### CLOSING PATTERNS
  205.     else if (match($0, /^[[:space:]]*\][[:space:]]*/)) {
  206.         json_print(keyword "_len", --building_array)
  207.         json_print_type(keyword, "array")
  208.         nk = nkeys[depth]
  209.         items = building_array
  210.         building_array = 0
  211.         trim_keyword()
  212.         trim_match()
  213.         sub(/^[[:space:]]*,[[:space:]]*/, "")
  214.         json_print_type(keyword "_" nk, "array")
  215.         dprint(sprintf("END ARRAY key[%u,%u]=\"%s\"" \
  216.             " holding %u object%s", depth, nk,
  217.             key[depth,nk], items, items != 1 ? "s" : ""))
  218.     }
  219.     else if (match($0, /^[[:space:]]*}[[:space:]]*/)) {
  220.         nk = nkeys[depth]
  221.         if (keyword && depth >= 1) {
  222.             hashname = key[depth-1,nkeys[depth-1]]
  223.             json_print(keyword, hashname)
  224.             json_print(keyword "_len", nk)
  225.             json_print_type(keyword, "hash")
  226.         }
  227.         building_hash = 0
  228.         depth-- # NB: Done prior to calling trim_keyword()
  229.         trim_keyword()
  230.         trim_match()
  231.         sub(/^[[:space:]]*,[[:space:]]*/, "")
  232.         building_hash = (depth > 1)
  233.         if (depth == 1) {
  234.             objname = keyword "_" nkeys[depth]
  235.             json_print_type(objname, "hash")
  236.         }
  237.         if (depth < 0) {
  238.             json_print_keys(keyword, depth+1)
  239.             dprint(sprintf("END HASH key[%u,%u]=\"%s\"" \
  240.                 " holding %u object%s", depth+2,
  241.                 nkeys[depth+2],
  242.                 key[depth+2,nkeys[depth+2]],
  243.                 nkeys[depth+3],
  244.                 nkeys[depth+3] != 1 ? "s" : ""))
  245.         } else if (kp = key[depth,nkeys[depth]]) {
  246.             dprint(sprintf("END HASH key[%u,%u]=\"%s\"" \
  247.                 " holding %u object%s", depth,
  248.                 nkeys[depth], kp, nkeys[depth+1],
  249.                 nkeys[depth+1] != 1 ? "s" : ""))
  250.         }
  251.     }
  252. } }
  253. ############################################### END
  254. END {
  255.     json_print_keys(keyword "_keys", 1)
  256.     dprint(sprintf("END HASH holding %u object%s", nkeys[1],
  257.         nkeys[1] != 1 ? "s" : ""))
  258. }
Add Comment
Please, Sign In to add comment