Advertisement
here2share

# python_plus_plus.py

Nov 8th, 2019
425
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.22 KB | None | 0 0
  1. # python_plus_plus.py
  2. # translates python into c++ language (over 99.99% copied)
  3.  
  4. import os.path
  5. import re
  6. import string
  7. import subprocess
  8. import sys
  9.  
  10. # valid identifier regex (for Python 2.x, Python 3+ adds unicode
  11. # which is way more complicated probably)
  12. # [A-Za-z_]\w*
  13.  
  14.  
  15. ppp_lib_imports = "import ppp_lib\n"
  16.  
  17. def strip_comments(s):
  18.   """
  19.  Simple regex to get rid of all comments.
  20.  """
  21.   return re.sub(r"^[\t ]*\#.*$", '', s, flags=re.MULTILINE)
  22.  
  23.  
  24. def increment(str):
  25.   """
  26.  Replace all increment and decrement operators with calls to our library
  27.  functions to handle increment and decrements.
  28.  """
  29.   dec_replaced = re.sub(
  30.     r"([A-Za-z_]\w*)\-\-",
  31.     r"ppp_lib.incdec.PostDecrement('\1', locals(), globals())",
  32.     str)
  33.   inc_replaced = re.sub(
  34.     r"([A-Za-z_]\w*)\+\+",
  35.     r"ppp_lib.incdec.PostIncrement('\1', locals(), globals())",
  36.     dec_replaced)
  37.   return inc_replaced
  38.  
  39.  
  40. def function_map(code_body, mod_func):
  41.   """
  42.  Given the entire code body, it identifies all instances of functions and
  43.  processes them using the mod_func function.
  44.  1. Identify all definitions of functions (not nested, not class methods).
  45.  2. Call the mod_func function with the header/body of the function.
  46.  3. Insert in the result.
  47.  """
  48.   # Given the limited scope of functions that we're handling, to find
  49.   # functions, we just need to find instances of the string 'def '
  50.  
  51.   # code_lines[function_starts[i] : function_ends[i]] should give us a
  52.   # function definition
  53.   function_starts = []
  54.   function_ends = []
  55.   code_lines = code_body.split("\n")
  56.   line = 0
  57.   while line < len(code_lines):
  58.     # If we haven't found the end for the function yet, look for the end.
  59.     if len(function_starts) > len(function_ends) and len(code_lines[line]) > 0 \
  60.        and code_lines[line][0] != ' ':
  61.       function_ends.append(line)
  62.  
  63.     # Look for a function start.
  64.     if code_lines[line].startswith('def '):
  65.       function_starts.append(line)
  66.  
  67.     line += 1
  68.  
  69.   if len(function_starts) > len(function_ends):
  70.     function_ends.append(line)
  71.  
  72.   # Iterate over the identified function, modify the functions and replace
  73.   # them in the body.
  74.   for start,end in zip(function_starts, function_ends):
  75.     func_def = '\n'.join(code_lines[start:end])
  76.     mod_func_def = mod_func(func_def)
  77.     code_body = code_body.replace(func_def, mod_func_def)
  78.  
  79.   return code_body
  80.  
  81. def mutable_args_func(func_def):
  82.   """
  83.  Handle mutable defaulted parameters in the function definition.
  84.  """
  85.   # Search for the arguments in the function definition.
  86.   header_pattern = r"def[\t ]*[A-Za-z_]\w*\((?P<args>.*)\):\n"
  87.   header = re.search(header_pattern, func_def)
  88.   arg_string = header.group('args')
  89.   args = [s.strip() for s in arg_string.split(',') if len(s.strip()) > 0]
  90.  
  91.   # Iterate over the arguments and modify as needed.
  92.   new_args = []
  93.   for arg in args:
  94.     # If this is not a keyword argument, continue without changes.
  95.     arg_parts = [s.strip() for s in arg.split('=')]
  96.     if len(arg_parts) == 2:
  97.       arg_name = arg_parts[0]
  98.       arg_default_val = arg_parts[1]
  99.  
  100.       # Identify if the default value belongs to the set of mutable objects that
  101.       # we're handling.
  102.       mutable_arg_pattern = r"\[.*\]|\{.*\}"
  103.       match = re.match(mutable_arg_pattern, arg_default_val)
  104.  
  105.       # If this is an immutable default value, continue as is.
  106.       if match:
  107.         # Else use our ppp_lib mutable type to construct a None-like object.
  108.         arg_val = re.sub(
  109.           r"\[.*\]|\{.*\}",
  110.           'ppp_lib.mutableargs.PPP_Sentinel_Obj(\'{0}\')'.format(arg_name),
  111.           arg_default_val)
  112.         new_args.append('{0}={1}'.format(arg_name, arg_val))
  113.       else:
  114.         new_args.append('{0}={1}'.format(arg_name, arg_default_val))
  115.     else:
  116.       new_args.append(arg_parts[0])
  117.  
  118.   # Construct the modified header.
  119.   modified_header = header.group(0).replace(arg_string, ', '.join(new_args))
  120.  
  121.   # Using our "None"-type we add checks to the body to set the parameters to their
  122.   # default values if an overriden value is not provided.
  123.   func_body = re.sub(header_pattern, '', func_def)
  124.   arg_checks = []
  125.  
  126.   # Find the indent amount used in the function definition.
  127.   first_indented_line = [e for e in func_body.split("\n") if len(e) > 0 and e[0] == ' '][0]
  128.   indent = re.match(r"^([\t ]*)", first_indented_line).group(1)
  129.   for arg in args:
  130.     # If this is a keyword argument with a mutable default value, we add
  131.     # a check to the body for it.
  132.     arg_parts = [s.strip() for s in arg.split('=')]
  133.     if len(arg_parts) == 2:
  134.       arg_name = arg_parts[0]
  135.       arg_default_val = arg_parts[1]
  136.  
  137.       # Check if it's mutable.
  138.       mutable_arg_pattern = r"\[.*\]|\{.*\}"
  139.       match = re.match(mutable_arg_pattern, arg_default_val)
  140.       if match:
  141.         # If the type of the defaulted value is our "None"-type, then we set
  142.         # it's value to be that of it's original defaulted value.
  143.         arg_check =  "{0}if (type({1}) is ppp_lib.mutableargs.PPP_Sentinel_Obj):\n"
  144.         arg_check += "{0}    {1} = {2}"
  145.         arg_check = arg_check.format(indent, arg_name, arg_default_val)
  146.         arg_checks.append(arg_check)
  147.  
  148.   # Add the new checks to the function body, prepend the header and return the
  149.   # modified function.
  150.   func_body = '\n'.join(arg_checks) + '\n' + func_body
  151.   return modified_header + func_body
  152.  
  153.  
  154. def deep_copy(str):
  155.   """
  156.  Simple (albeit not very robust) regex to replace all deep copies with
  157.  list comprehensions.
  158.  """
  159.   mult_pattern = r"\][\t ]*\*[\t ]*([a-zA-Z0-9_-]+)"
  160.   return re.sub(mult_pattern, r" for _ in range(\1)]", str)
  161.  
  162. def tail_call(func_def):
  163.   """
  164.  Given a function definition, identify all tail self-recursive calls and
  165.  handle them accordingly.
  166.  """
  167.   # Identify the indent amount
  168.   first_indented_line = [e for e in func_def.split("\n") if len(e) > 0 and e[0] == ' '][0]
  169.   indent = re.match(r"^([\t ]*)", first_indented_line).group(1)
  170.  
  171.   # Identify the function name
  172.   header_pattern = r"def[\t ]*(?P<name>[A-Za-z_]\w*)\(.*\):\n"
  173.   header = re.search(header_pattern, func_def)
  174.   func_name = header.group("name")
  175.  
  176.   # Find (for now only single line) recursive calls. Store their line number iff
  177.   # it appears to be a tail recursive call.
  178.   code_lines = func_def.split("\n")
  179.   line = 0
  180.   tail_calls = []
  181.   while line < len(code_lines):
  182.     stripped_line = code_lines[line].strip()    
  183.     line += 1
  184.  
  185.     # Continue if it doesn't start with a return
  186.     if not (stripped_line.startswith("return %s" % func_name) or
  187.             stripped_line.startswith(func_name)):
  188.       continue
  189.  
  190.     # Continue if the line ends in something other than a close paren.
  191.     # Obviously, this is not robust, but we assume it's sufficient.
  192.     if not stripped_line.endswith(")"):
  193.       continue
  194.  
  195.     # If we've made it this far, add the line number (of cur iteration).
  196.     tail_calls.append(line-1)
  197.    
  198.   # Parse out the parameters (assume no keyword arguments)
  199.   param_pattern = r"def[\t ]*[A-Za-z_]\w*\((?P<args>.*)\):\n"
  200.   parameters = re.search(param_pattern, func_def).group('args')
  201.   # Now process all of the tail calls.    
  202.   for i in tail_calls:
  203.     # Retrieve the tail recursive call
  204.     tail_call_line = code_lines[i]
  205.  
  206.     # Parse out the arguments. Assume no nested brackets, simple functional
  207.     # call.
  208.     start_index = tail_call_line.index("(")
  209.     end_index = tail_call_line.index(")")
  210.     args = tail_call_line[start_index+1:end_index]
  211.  
  212.     # Get indent of the tail call line
  213.     tail_indent = re.match(r"^([\t ]*)", tail_call_line).group(1)
  214.  
  215.     # Create two lines: 1. updating params, 2. raising a StopIteration error
  216.     update_line = tail_indent + parameters + " = " + args + "\n"
  217.     error_line = tail_indent + "raise ppp_lib.tail_call.NextCall"
  218.  
  219.     # Replace the tail call line with the modified version
  220.     func_def = func_def.replace(tail_call_line, update_line + error_line)    
  221.    
  222.   # Indent all of the function definition (after the header on the first line)
  223.   code_lines = func_def.split("\n")
  224.  
  225.   # Indent every line after the header
  226.   code_lines = [2*indent+line if i > 0 else line for i,line in enumerate(code_lines)]
  227.  
  228.   # Insert a while True as the first line after the header
  229.   code_lines.insert(1, indent + "while True:")
  230.  
  231.   # Insert a try statement as the second line after the header
  232.   code_lines.insert(2, indent*2 + "try:")
  233.  
  234.   # Insert a break right after the code. If we've made it this far without a function call,
  235.   # we would usually exit out without a return statement. Since we don't want to infinite loop
  236.   # we have to do this by breaking out of the while True loop.
  237.   code_lines.append(3*indent + "break")
  238.  
  239.   # The very last lines will be catching the exception and continuing the outer loop.
  240.   code_lines.append(2*indent + "except ppp_lib.tail_call.NextCall:")
  241.   code_lines.append(3*indent + "continue")
  242.  
  243.   return "\n".join(code_lines)
  244.  
  245. def main():
  246.   if len(sys.argv) != 2:
  247.     print('Invalid args. Please provide a filename.')
  248.     sys.exit(-1)
  249.  
  250.   input_file_path = os.path.abspath(sys.argv[1])
  251.  
  252.   # Basic sanity check to ensure argument is a file.
  253.   if not os.path.isfile(input_file_path):
  254.     print('Invalid args. Please provide a filename.')
  255.     sys.exit(-1)
  256.  
  257.   # Ensure we're given a ppp file.
  258.   input_file_basename = os.path.basename(input_file_path)
  259.   filename, input_file_ext = os.path.splitext(input_file_basename)
  260.   if input_file_ext != '.ppp':
  261.     print('Input file not a Python++ file.')
  262.     sys.exit(-1)
  263.  
  264.   # Construct the absolute path to the new file.
  265.   compiled_file_path = os.path.join(os.path.dirname(input_file_path), filename + '.py')
  266.  
  267.   # Read input file.
  268.   fin = open(input_file_path, 'r')
  269.   ppp_source = ''.join(fin.readlines())
  270.   fin.close()
  271.  
  272.   # Perform transforms.
  273.   ppp_source = strip_comments(ppp_source)
  274.   ppp_source = deep_copy(ppp_source)
  275.   ppp_source = function_map(ppp_source, mutable_args_func)
  276.   ppp_source = function_map(ppp_source, tail_call)
  277.   ppp_source = increment(ppp_source)
  278.  
  279.   # Open output file and dump the modified code to it.
  280.   fout = open(compiled_file_path, 'w')
  281.   fout.write(ppp_lib_imports)
  282.   fout.write(ppp_source)
  283.   fout.close()
  284.  
  285.   # Pass the output file to the Python3 interpreter.
  286.   subprocess.call('python3 %s' % compiled_file_path, shell=True)
  287.   sys.exit(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement