Advertisement
Mat4297

Father soon

Apr 15th, 2025 (edited)
270
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 14.44 KB | Source Code | 0 0
  1. import pandas as pd
  2. import graphviz # Required for 'graphviz' format type
  3. import os
  4. from io import StringIO # To read string data into DataFrame for example
  5.  
  6. # ==============================================================
  7. # PASSTE THE FULL CODE FOR THE FOLLOWING FUNCTIONS HERE:
  8. # - _generate_graphviz_code
  9. # - _generate_mermaid_code
  10. # - generate_hierarchy_visualization
  11. # - expand_nested_groups (if you need the expanded table)
  12. # - add_super_masters (if you use super masters)
  13. # ==============================================================
  14. # --- Start of Function Definitions (Paste from previous answers) ---
  15.  
  16. def expand_nested_groups(df, master_col, soon_col):
  17.     # --- (Full code from previous answer) ---
  18.     if master_col not in df.columns or soon_col not in df.columns: return None
  19.     df_original = df[[master_col, soon_col]].copy().dropna(subset=[master_col, soon_col])
  20.     df_original[master_col] = df_original[master_col].astype(str)
  21.     df_original[soon_col] = df_original[soon_col].astype(str)
  22.     all_masters = set(df_original[master_col].unique())
  23.     df_expanded = df_original.rename(columns={master_col: 'Level_0', soon_col: 'Level_1'})
  24.     df_final_paths = pd.DataFrame()
  25.     current_level = 1
  26.     max_iterations = 20
  27.     iterations = 0
  28.     while not df_expanded.empty and iterations < max_iterations:
  29.         iterations += 1
  30.         level_col = f'Level_{current_level}'
  31.         if level_col not in df_expanded.columns:
  32.              df_final_paths = pd.concat([df_final_paths, df_expanded], ignore_index=True); break
  33.         is_master_mask = df_expanded[level_col].isin(all_masters)
  34.         df_to_expand_further = df_expanded[is_master_mask].copy()
  35.         df_terminal_at_this_level = df_expanded[~is_master_mask].copy()
  36.         if not df_terminal_at_this_level.empty:
  37.             df_final_paths = pd.concat([df_final_paths, df_terminal_at_this_level], ignore_index=True)
  38.         if df_to_expand_further.empty: break
  39.         current_level += 1
  40.         next_level_col = f'Level_{current_level}'
  41.         df_expanded_next = pd.merge(df_to_expand_further, df_original, left_on=level_col, right_on=master_col, how='left', suffixes=('', '_lookup'))
  42.         df_expanded_next = df_expanded_next.rename(columns={soon_col: next_level_col})
  43.         if master_col in df_expanded_next.columns: df_expanded_next = df_expanded_next.drop(columns=[master_col])
  44.         no_members_mask = df_expanded_next[next_level_col].isna()
  45.         if no_members_mask.any():
  46.              df_final_paths = pd.concat([df_final_paths, df_expanded_next[no_members_mask]], ignore_index=True)
  47.              df_expanded_next = df_expanded_next[~no_members_mask]
  48.         df_expanded = df_expanded_next
  49.         if iterations == max_iterations: print(f"Warning: Max iterations ({max_iterations}) reached."); df_final_paths = pd.concat([df_final_paths, df_expanded], ignore_index=True)
  50.     max_level_found = 0; final_cols = []
  51.     for i in range(current_level + 2):
  52.          col_name = f'Level_{i}';
  53.          if col_name in df_final_paths.columns: final_cols.append(col_name); max_level_found = i
  54.          else: break
  55.     if not final_cols:
  56.         if 'Level_0' in df_final_paths.columns and 'Level_1' in df_final_paths.columns: final_cols = ['Level_0', 'Level_1']; max_level_found = 1
  57.         elif not df_final_paths.empty: final_cols = df_final_paths.columns.tolist();
  58.         if final_cols: max_level_found = len(final_cols) -1
  59.         else: return pd.DataFrame()
  60.     df_final_paths = df_final_paths[final_cols].copy()
  61.     if final_cols: rename_dict = {f'Level_{max_level_found}': 'Ultimate_Member', 'Level_0': 'Initial_Master'}; df_final_paths.rename(columns=rename_dict, inplace=True)
  62.     return df_final_paths.drop_duplicates().reset_index(drop=True)
  63.  
  64.  
  65. def add_super_masters(df_original, master_col, soon_col, super_master_map):
  66.      # --- (Full code from previous answer) ---
  67.     if not isinstance(df_original, pd.DataFrame): return None
  68.     if master_col not in df_original.columns or soon_col not in df_original.columns: return None
  69.     if not isinstance(super_master_map, dict): return None
  70.     new_rows = []
  71.     for super_master, original_masters in super_master_map.items():
  72.         if isinstance(original_masters, (list, tuple)):
  73.             for original_master in original_masters: new_rows.append({master_col: str(super_master), soon_col: str(original_master)})
  74.         else: print(f"Warning: Value for super master '{super_master}' not list/tuple."); continue
  75.     if not new_rows: print("Warning: No new relationships generated."); return df_original.copy()
  76.     df_new_relations = pd.DataFrame(new_rows)
  77.     df_original_copy = df_original.astype(str) # Ensure original data is string
  78.     df_combined = pd.concat([df_new_relations, df_original_copy], ignore_index=True)
  79.     return df_combined
  80.  
  81. def _generate_graphviz_code(df_plot, master_col, soon_col, **kwargs):
  82.     # --- (Full code from previous answer) ---
  83.     rankdir = kwargs.get('rankdir', 'TB'); filename = kwargs.get('filename', 'graphviz_output'); render_format = kwargs.get('render_format', 'png'); view = kwargs.get('view', False); render_graph = kwargs.get('render', False)
  84.     all_masters = set(df_plot[master_col].unique()); all_items = set(all_masters).union(set(df_plot[soon_col].unique()))
  85.     dot = graphviz.Digraph(comment='Group Hierarchy', format=render_format, graph_attr={'rankdir': rankdir}, strict=True)
  86.     node_map = {}
  87.     for item in all_items:
  88.         node_id = ''.join(filter(str.isalnum, item.replace(' ', '_'))); node_label = item;
  89.         if not node_id: node_id = f"node_{hash(item)}"
  90.         node_map[item] = node_id # Store mapping for edges
  91.         if item in all_masters: dot.node(node_id, node_label, shape='box', style='filled', fillcolor='lightblue')
  92.         else: dot.node(node_id, node_label, shape='ellipse')
  93.     for _, row in df_plot.iterrows():
  94.         master_name = row[master_col]; soon_name = row[soon_col]
  95.         master_node_id = node_map.get(master_name)
  96.         soon_node_id = node_map.get(soon_name)
  97.         if master_node_id and soon_node_id: dot.edge(master_node_id, soon_node_id)
  98.         else: print(f"Warning: Missing node ID for edge {master_name} -> {soon_name}")
  99.     if render_graph:
  100.         try: rendered_path = dot.render(filename, view=view, cleanup=True, format=render_format); print(f"Graph saved to: {rendered_path}"); return rendered_path
  101.         except graphviz.ExecutableNotFound: print("\nError: graphviz.ExecutableNotFound..."); return None
  102.         except Exception as e: print(f"\nAn error occurred during graph rendering: {e}"); return None
  103.     else: return dot
  104.  
  105. def _generate_mermaid_code(df_plot, master_col, soon_col, **kwargs):
  106.     # --- (Full code from previous answer) ---
  107.     rankdir = kwargs.get('rankdir', 'TB'); save_path = kwargs.get('save_to_file')
  108.     mermaid_code = [f"graph {rankdir};"]; all_masters = set(df_plot[master_col].unique()); all_items = set(all_masters).union(set(df_plot[soon_col].unique()))
  109.     node_definitions = []; style_definitions = []; node_map = {}
  110.     for i, item in enumerate(all_items):
  111.         safe_chars = ''.join(filter(str.isalnum, item)); mermaid_id = f"id_{i}_{safe_chars[:20]}";
  112.         if not safe_chars: mermaid_id = f"id_empty_{i}"
  113.         node_map[item] = mermaid_id; node_label = item.replace('"', '#quot;')
  114.         node_definitions.append(f'    {mermaid_id}["{node_label}"]')
  115.         if item in all_masters: style_definitions.append(f'    style {mermaid_id} fill:#lightblue,stroke:#333,stroke-width:1px')
  116.     mermaid_code.extend(node_definitions); mermaid_code.extend(style_definitions); edge_definitions = []
  117.     for _, row in df_plot.iterrows():
  118.         master_name = row[master_col]; soon_name = row[soon_col]
  119.         if master_name in node_map and soon_name in node_map: master_id = node_map[master_name]; soon_id = node_map[soon_name]; edge_definitions.append(f"    {master_id} --> {soon_id};")
  120.         else: print(f"Warning: Could not find map ID for edge: {master_name} -> {soon_name}")
  121.     mermaid_code.extend(edge_definitions); final_code = "\n".join(mermaid_code)
  122.     if save_path:
  123.         try:
  124.             with open(save_path, 'w', encoding='utf-8') as f: f.write(final_code); print(f"Mermaid code saved to: {save_path}")
  125.         except Exception as e: print(f"Error saving Mermaid code to {save_path}: {e}")
  126.     return final_code
  127.  
  128. def generate_hierarchy_visualization(df_input, master_col, soon_col, format_type='graphviz', **kwargs):
  129.     # --- (Full code from previous answer, including docstring) ---
  130.     """Generates hierarchy visualization code or renders a graph."""
  131.     if not isinstance(df_input, pd.DataFrame): print("Error: df_input must be a pandas DataFrame."); return None
  132.     if master_col not in df_input.columns or soon_col not in df_input.columns: print(f"Error: Columns '{master_col}' or '{soon_col}' not found."); return None
  133.     df_plot = df_input[[master_col, soon_col]].copy().dropna(subset=[master_col, soon_col])
  134.     try: df_plot[master_col] = df_plot[master_col].astype(str); df_plot[soon_col] = df_plot[soon_col].astype(str)
  135.     except Exception as e: print(f"Warning: Could not convert columns to string. Error: {e}")
  136.     format_type = format_type.lower()
  137.     if format_type == 'graphviz': print("--- Generating Graphviz data ---"); return _generate_graphviz_code(df_plot, master_col, soon_col, **kwargs)
  138.     elif format_type == 'mermaid': print("--- Generating Mermaid code ---"); return _generate_mermaid_code(df_plot, master_col, soon_col, **kwargs)
  139.     else: print(f"Error: Unsupported format_type '{format_type}'."); return None
  140.  
  141. # --- End of Function Definitions ---
  142.  
  143.  
  144. # ==============================================================
  145. #                      EXAMPLE USAGE
  146. # ==============================================================
  147.  
  148. # 1. --- Define Input Data ---
  149. # Using the previous example data
  150. data = """
  151. Group Name,Member
  152. Core Dev Team,Alice
  153. Core Dev Team,Bob
  154. Core Dev Team,Charlie
  155. UI/UX Crew,David
  156. UI/UX Crew,Eve
  157. Backend Brigade,Core Dev Team
  158. Backend Brigade,Frank
  159. Frontend Force,UI/UX Crew
  160. Frontend Force,Grace
  161. Project Dragon,Backend Brigade
  162. Project Dragon,Frontend Force
  163. Project Dragon,Hannah
  164. QA Squad,Ian
  165. QA Squad,Judy
  166. Operations Unit,Project Dragon
  167. Operations Unit,QA Squad
  168. Operations Unit,Kevin
  169. """
  170. data_io = StringIO(data)
  171. df_input_original = pd.read_csv(data_io)
  172.  
  173. master_column_name = 'Group Name'
  174. soon_column_name = 'Member'
  175.  
  176. print("--- Original Input DataFrame ---")
  177. print(df_input_original.head()) # Display first few rows
  178.  
  179. # 2. --- Optional: Define and Add Super Masters ---
  180. super_masters = {
  181.     'Overall Company': ['Operations Unit', 'Frontend Force'],
  182.     'Special Projects': ['Backend Brigade']
  183. }
  184.  
  185. # Create the augmented DataFrame (includes original + super masters)
  186. df_augmented = add_super_masters(
  187.     df_input_original,
  188.     master_column_name,
  189.     soon_column_name,
  190.     super_masters
  191. )
  192.  
  193. # --- >> NEW: Save the Augmented DataFrame << ---
  194. if df_augmented is not None:
  195.     augmented_filename_csv = 'augmented_hierarchy_input.csv'
  196.     # augmented_filename_excel = 'augmented_hierarchy_input.xlsx' # Optional Excel
  197.     try:
  198.         df_augmented.to_csv(augmented_filename_csv, index=False, encoding='utf-8')
  199.         print(f"\n--- Augmented DataFrame saved to: {augmented_filename_csv} ---")
  200.         # To save as Excel (requires openpyxl: pip install openpyxl):
  201.         # df_augmented.to_excel(augmented_filename_excel, index=False)
  202.         # print(f"--- Augmented DataFrame saved to: {augmented_filename_excel} ---")
  203.     except Exception as e:
  204.         print(f"Error saving augmented DataFrame: {e}")
  205.  
  206.     # Display head of augmented data
  207.     print(df_augmented.head(10).to_string()) # Show new + old rows
  208.     print("...")
  209. else:
  210.     print("Failed to create augmented DataFrame. Skipping subsequent steps.")
  211.     # Exit or handle error appropriately if needed
  212.     exit() # Example: Stop script if augmentation failed
  213.  
  214.  
  215. # 3. --- Optional: Expand the Augmented Hierarchy ---
  216. # Use the 'df_augmented' DataFrame as input for expansion
  217. df_expanded_result = expand_nested_groups(
  218.     df_augmented, # Use the DataFrame with super masters
  219.     master_column_name,
  220.     soon_column_name
  221. )
  222.  
  223. # --- >> NEW: Save the Expanded DataFrame << ---
  224. if df_expanded_result is not None:
  225.     expanded_filename_csv = 'expanded_hierarchy_paths.csv'
  226.     # expanded_filename_excel = 'expanded_hierarchy_paths.xlsx' # Optional Excel
  227.     try:
  228.         df_expanded_result.to_csv(expanded_filename_csv, index=False, encoding='utf-8')
  229.         print(f"\n--- Expanded Paths DataFrame saved to: {expanded_filename_csv} ---")
  230.         # To save as Excel (requires openpyxl: pip install openpyxl):
  231.         # df_expanded_result.to_excel(expanded_filename_excel, index=False)
  232.         # print(f"--- Expanded Paths DataFrame saved to: {expanded_filename_excel} ---")
  233.     except Exception as e:
  234.         print(f"Error saving expanded DataFrame: {e}")
  235.  
  236.     # Display the expanded result
  237.     print("\n--- Fully Expanded DataFrame (from Augmented Data) ---")
  238.     print(df_expanded_result.to_string())
  239. else:
  240.     print("Failed to expand the augmented DataFrame.")
  241.  
  242.  
  243. # 4. --- Generate Visualizations (using Augmented Data) ---
  244. # Now use 'df_augmented' as the input for visualization functions
  245.  
  246. print("\n======= Generating Visualizations (from Augmented Data) =======")
  247.  
  248. # Example 4a: Generate Mermaid code string and save to file
  249. mermaid_output = generate_hierarchy_visualization(
  250.     df_input=df_augmented, # Use the DataFrame with super masters
  251.     master_col=master_column_name,
  252.     soon_col=soon_column_name,
  253.     format_type='mermaid',
  254.     rankdir='TB',
  255.     save_to_file='final_hierarchy.mmd' # Save Mermaid code
  256. )
  257. if mermaid_output:
  258.     print("\n--- Mermaid Code Snippet: ---")
  259.     print("```mermaid")
  260.     # Print first few lines for preview if long
  261.     print('\n'.join(mermaid_output.splitlines()[:15]))
  262.     if len(mermaid_output.splitlines()) > 15: print("...")
  263.     print("```")
  264.     print("(Full code saved to final_hierarchy.mmd)")
  265.  
  266. # Example 4b: Render Graphviz to PNG and view
  267. gv_rendered_path = generate_hierarchy_visualization(
  268.     df_input=df_augmented, # Use the DataFrame with super masters
  269.     master_col=master_column_name,
  270.     soon_col=soon_column_name,
  271.     format_type='graphviz',
  272.     render=True,  # Render the image
  273.     filename='final_hierarchy_gv', # Base filename for .png and .gv
  274.     render_format='png',
  275.     view=True,     # Try to open the PNG
  276.     rankdir='LR'
  277. )
  278. if gv_rendered_path:
  279.      print(f"Graphviz rendering process initiated for {gv_rendered_path}.")
  280.  
  281. print("\n--- Script finished ---")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement