Advertisement
rplantiko

Using the XML DOM parser in ABAP

Jan 24th, 2012
1,618
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
ABAP 6.98 KB | None | 0 0
  1. *&---------------------------------------------------------------------*
  2. *& Report  ZZ_DOM_PARSE_XHTML
  3. *&---------------------------------------------------------------------*
  4.  
  5. * From time to time, I receive requests on how to use the XML DOM parser
  6. * This report should explain once and forever how this is done
  7.  
  8. * It scans an XHTML document for paragraphs (<p> elements), using the
  9. * method if_ixml_document->get_elements_by_tag_name_ns( ).
  10.  
  11. * Of course, this is only an example. For other questions, other actions
  12. * on the DOM might be more appropriate, for example traversing the tree
  13. * with methods like get_children( )
  14.  
  15. * For collections of elements, I prefer to work with the table type
  16. * DCXMLELEMS, a standard table of IF_IXML_ELEMENT objects
  17. * In ABAP, it's easier and more readable to loop over a table than having
  18. * to iterate over an interface
  19.  
  20. * Observe that the parser mode "Namespace aware" has no influence at all
  21. * on the result
  22.  
  23. report  zz_dom_parse_xhtml.
  24.  
  25. data: go_xml type ref to if_ixml,
  26.       go_stream_factory type ref to if_ixml_stream_factory.
  27.  
  28. parameters: p_elnam type text30 default 'p',
  29.             p_ns    type text128 default 'http://www.w3.org/1999/xhtml',
  30.             p_aware as checkbox default space.
  31.  
  32. initialization.
  33.   perform init.
  34.  
  35. start-of-selection.
  36.   perform start.
  37.  
  38. * ---
  39. form start.
  40.  
  41.   data: lo_xhtml type ref to if_ixml_document,
  42.         lt_elements type dcxmlelems,
  43.         lv_element_name type string,
  44.         lv_namespace type string.
  45.  
  46.   try.
  47.  
  48.       perform build_test_xhtml using p_aware changing lo_xhtml.
  49.  
  50.       lv_element_name = p_elnam.
  51.       lv_namespace = p_ns.
  52.       perform search_elements using lo_xhtml lv_element_name lv_namespace
  53.                               changing lt_elements.
  54.  
  55.       perform write_result using lt_elements.
  56.  
  57.     catch cx_ixml_parse_error.
  58. * Error log has already been written
  59.   endtry.
  60.  
  61. endform.                    "start
  62.  
  63. * ---
  64. form search_elements using io_xhtml type ref to if_ixml_document
  65.                            iv_elnam type string
  66.                            iv_ns    type string
  67.                      changing et_elements type dcxmlelems.
  68.  
  69.   data: lo_elements type ref to if_ixml_node_collection.
  70.  
  71.   lo_elements = io_xhtml->get_elements_by_tag_name_ns(
  72.                              name      = iv_elnam
  73.                              uri       = iv_ns ).
  74.  
  75.   perform element_table_from_collection
  76.     using lo_elements
  77.     changing et_elements.
  78.  
  79.  
  80. endform.                    "search_elements
  81.  
  82. * ---
  83. form element_table_from_collection
  84.     using io_elements type ref to if_ixml_node_collection
  85.     changing et_elements type dcxmlelems.
  86.  
  87.   data: lo_iterator type ref to if_ixml_node_iterator,
  88.         lo_node type ref to if_ixml_node,
  89.         lo_element type ref to if_ixml_element.
  90.  
  91.   lo_iterator = io_elements->create_iterator( ).
  92.  
  93.   do.
  94.  
  95.     clear lo_node.
  96.     lo_node = lo_iterator->get_next( ).
  97.     if lo_node is not bound.
  98.       exit.
  99.     endif.
  100.  
  101.     clear lo_element.
  102.     lo_element ?= lo_node->query_interface( ixml_iid_element ).
  103.     if lo_element is bound.
  104.       append lo_element to et_elements.
  105.     endif.
  106.  
  107.   enddo.
  108.  
  109. endform.                    "element_table_from_collection
  110.  
  111. * ---
  112. form build_test_xhtml
  113.   using iv_ns_aware type flag
  114.   changing eo_xhtml type ref to if_ixml_document
  115.   raising cx_ixml_parse_error.
  116.  
  117.   data: lv_xhtml type string.
  118.  
  119.   perform get_test_html_as_string changing lv_xhtml.
  120.  
  121.   perform parse using lv_xhtml iv_ns_aware
  122.                 changing eo_xhtml.
  123.  
  124. endform.                    "build_test_xhtml
  125.  
  126. * ---
  127. form parse using iv_xml type string
  128.                  iv_ns_aware type flag
  129.            changing eo_doc type ref to if_ixml_document
  130.            raising cx_ixml_parse_error.
  131.  
  132.   data: lo_parser type ref to if_ixml_parser,
  133.         lo_stream type ref to if_ixml_istream.
  134.  
  135.   lo_stream   = go_stream_factory->create_istream_string( iv_xml ).
  136.   eo_doc = go_xml->create_document( ).
  137.  
  138.   lo_parser   = go_xml->create_parser( document       = eo_doc
  139.                                        istream        = lo_stream
  140.                                        stream_factory = go_stream_factory ).
  141.  
  142.   if iv_ns_aware = 'X'.
  143.     lo_parser->set_namespace_mode( if_ixml_parser=>co_namespace_aware ).
  144.   endif.
  145.  
  146.   lo_parser->parse( ).
  147.  
  148.   if lo_parser->num_errors( ) > 0.
  149.     perform do_parser_errors using lo_parser.
  150.   endif.
  151.  
  152. endform.                    "parse
  153.  
  154. * ---
  155. form do_parser_errors
  156.        using io_parser type ref to if_ixml_parser
  157.        raising cx_ixml_parse_error.
  158.  
  159.   data: lo_error type ref to if_ixml_parse_error,
  160.         lv_maxnum type i,
  161.         lv_num type i,
  162.         lv_text type string,
  163.         lv_line     type i,
  164.         lv_column   type i,
  165.         lv_severity type i.
  166.  
  167.  
  168. * Fehler im Nachrichtensammler einfügen
  169.   lv_maxnum = io_parser->num_errors( ).
  170.   while lv_num < lv_maxnum.
  171.     lo_error = io_parser->get_error( lv_num ).
  172.     lv_text  = lo_error->get_reason( ).
  173.     lv_line  = lo_error->get_line( ).
  174.     lv_column = lo_error->get_column( ).
  175.     lv_severity = lo_error->get_severity( ).
  176.     write: / lv_line left-justified no-gap, '(' no-gap, lv_column left-justified no-gap, ')', lv_text.
  177.     add 1 to lv_num.
  178.   endwhile.
  179.  
  180. * Wenn Fehler auftraten, Ausnahme auslösen
  181.   if lv_maxnum > 0.
  182.     raise exception type cx_ixml_parse_error
  183.       exporting
  184.         reason = lv_text
  185.         line   = lv_line
  186.         column = lv_column.
  187.   endif.
  188.  
  189. endform.                    "write_parser_errors
  190.  
  191. *---
  192. form init.
  193.   go_xml = cl_ixml=>create( ).
  194.   go_stream_factory = go_xml->create_stream_factory( ).
  195. endform.                    "init
  196.  
  197. * ---
  198. form get_test_html_as_string changing ev_xhtml type string.
  199.   data: lv_xhtml type string.
  200.   concatenate
  201. `<?xml version="1.0" encoding="ISO-8859-1" ?>`
  202. *`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "xhtml1-transitional.dtd">`
  203. `<html xmlns="http://www.w3.org/1999/xhtml">`
  204. `<head>`
  205. `<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />`
  206. `<title>Wenn HTML zu XHTML wird</title>`
  207. `</head>`
  208. `<body>`
  209. ``
  210. `<h1><a name="start" id="start">Wenn HTML zu XHTML wird</a></h1>`
  211. ``
  212. `<p id="erster" >Erster Paragraph.</p>`
  213. `<p id="zweiter">Zweiter Paragraph.</p>`
  214. `<p id="dritter">Na, wievielter Paragraph wohl?</p>`
  215. `</body>`
  216. `</html>`
  217.   into ev_xhtml
  218.   separated by cl_abap_char_utilities=>cr_lf. " Für Zeilenausgabe in Fehlermeldungen
  219. endform.                    "get_test_html_as_string
  220.  
  221. * ---
  222. form write_result using it_elements type dcxmlelems.
  223.  
  224.   data: lo_element type ref to if_ixml_element,
  225.         lv_id type string,
  226.         lv_text type string.
  227.  
  228.   if it_elements is not initial.
  229.     loop at it_elements into lo_element.
  230.       lv_id = lo_element->get_attribute( 'id' ).
  231.       lv_text = lo_element->get_content_as_string( ).
  232.       write: / lv_id, at 15 ':', lv_text.
  233.     endloop.
  234.   else.
  235.     write: / 'Keine Elemente mit diesem Namen und Namespace gefunden'.
  236.   endif.
  237.  
  238. endform.                    "write_result
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement