Advertisement
max2201111

very good VK last orezane #

Jun 20th, 2024
552
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.68 KB | Science | 0 0
  1. # Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024
  2. # Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1)  a bez znaku #; pozor na ","
  3.  
  4. # [ [23.657800719276743,18.859916797201468,0],
  5. # [22.573729142097473,17.96922325097786,0],
  6. # [32.55342396968757,29.463651408558803,0],
  7. # [6.718035041529263,25.704665468161718,1],
  8. # [14.401918566243225,16.770856492924658,0],
  9. # [17.457907312962234,21.76521470574044,0],
  10. # [20.02796946568093,73.45445954770891,1],
  11. # [30.295138369778076,62.901112886193246,1],
  12. # [15.128977804449633,32.40267702110393,0],
  13. # [30.179457395820013,58.982492125646104,1],
  14. # [28.01649701854089,63.92781357637711,1],
  15. # [16.791838457871147,42.33482314089884,0],
  16. # [10.583694293380976,19.61926728942497,0],
  17. # [26.634447074406467,91.96624817360987,1],
  18. # [26.217868623367643,36.400293587062976,0],
  19. # [17.689396788624936,60.79797114006423,1],
  20. # [33.17193822527976,66.75277364959176,1],
  21. # [23.793952755709153,22.57501437360518,0]]
  22.  
  23. # kliknete na cerne tlacitko s trojuhelnickem vlevo nahore
  24. # pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej
  25. # soubor, ktery mate z bodu vyse vyberte a nahrajte
  26. # Najdete v tomto kodu retezec:
  27. ### ZDE VLOZTE DATA OD NOVYCH PACIENTU
  28.  
  29. # Vlozte do pole
  30. # new_persons_results = []
  31. # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2
  32. # kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny
  33. # zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1)
  34. # cisla uprostred pak indikuji zadany oranzovy semafor.
  35. # je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny
  36.  
  37. # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky
  38. # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd.
  39.  
  40. # kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti,F1 score recall atd.
  41. # poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd.
  42. # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy
  43.  
  44. # Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna
  45.  
  46. import numpy as np
  47. import matplotlib.pyplot as plt
  48. import tensorflow as tf
  49. from tqdm import tqdm
  50. from sklearn.preprocessing import StandardScaler
  51.  
  52. from IPython.display import display
  53. from IPython.display import Javascript
  54. display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;'))
  55.  
  56. label_colors = {0: [0, 128, 0], 1: [255, 0, 0]}
  57. label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]}
  58.  
  59. %matplotlib inline
  60.  
  61. # Function to create images based on predictions
  62. def create_image(data, predictions):
  63.     num_rows, num_columns = len(data), len(data[0])
  64.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  65.  
  66.     for i in range(num_rows):
  67.         for j in range(num_columns):
  68.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  69.             image[i, j] = np.array([pixel_value] * 3)
  70.  
  71.         # Create a gradient based on the normalized values
  72.         gradient_value = int(np.interp(predictions[i], [0, 1], [0, 255]))
  73.         image[i, -1] = np.array([gradient_value] * 3)
  74.  
  75.     return image
  76.  
  77. def create_image(data, predictions, label_colors):
  78.     num_rows, num_columns = len(data), len(data[0])
  79.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  80.  
  81.     for i in range(num_rows):
  82.         for j in range(num_columns):
  83.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  84.             image[i, j] = np.array([pixel_value] * 3)
  85.  
  86.         # Use the specified color for the last column based on the label
  87.         image[i, -1] = label_colors[predictions[i]]
  88.  
  89.     return image
  90.  
  91. def create_imageN(data, predictions, label_colors=None):
  92.     num_training_rows = len(data)  # Set the number of rows based on the data
  93.     num_columns = len(data[0])
  94.  
  95.     image_training = np.zeros((num_training_rows, num_columns + 1, 3), dtype=np.uint8)
  96.  
  97.     min_pixel_value = np.min(X_train_normalized)
  98.     max_pixel_value = np.max(X_train_normalized)
  99.  
  100.     for i in range(num_training_rows):
  101.         # Normalize the first columns independently
  102.         for j in range(num_columns):
  103.             pixel_value = int(np.interp(data[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  104.             image_training[i, j] = np.array([pixel_value] * 3)
  105.  
  106.         # Normalize the last column separately to achieve grayscale
  107.         pixel_value_last = int(np.interp(data[i][-1], [min_pixel_value, max_pixel_value], [0, 255]))
  108.         image_training[i, -1] = np.array([pixel_value_last] * 3)
  109.  
  110.         # Use the specified color for the last column based on the label
  111.         if label_colors is not None:
  112.             image_training[i, -1] = label_colors[predictions[i]]
  113.  
  114.     return image_training
  115.  
  116. # Load data from a file
  117. from google.colab import files
  118. import io
  119. import pandas as pd
  120. import os
  121. import shutil
  122. import ast
  123.  
  124. uploaded = files.upload()
  125.  
  126. # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače.
  127. # Předpokládáme, že jste nahráli CSV soubor
  128. for fn in uploaded.keys():
  129.     print('User uploaded file "{name}" with length {length} bytes'.format(
  130.         name=fn, length=len(uploaded[fn])))
  131.     path = io.BytesIO(uploaded[fn])  # Pro soubory, které potřebují být čteny jako binární objekty
  132.     df = pd.read_csv(path)
  133.     print(df.head())  # Vypíše prvních pět řádků DataFrame
  134.  
  135. all_results = []
  136.  
  137. for filename in uploaded.keys():
  138.     original_path = f"/content/{filename}"
  139.     destination_path = os.path.join("/content/", "/content/DATA2")
  140.     shutil.move(original_path, destination_path)
  141.     print(f"Soubor {filename} byl přesunut do {destination_path}")
  142.  
  143. file_path = '/content/DATA2'  # Cesta k souboru
  144. with open(file_path, 'r') as file:
  145.     code = file.read()
  146.  
  147. A_list = ast.literal_eval(code)
  148.  
  149. # Převod na NumPy pole
  150. A = np.array(A_list)
  151.  
  152. # Assign values to variables dynamically based on the rows of matrix A
  153. for i, row in enumerate(A, start=1):
  154.     globals()[f"person{i}_results"] = list(row)
  155.  
  156. # Print the assigned variables
  157. for i in range(1, len(A) + 1):
  158.     all_results.append(f"person{i}_results")
  159.  
  160. result_variables = []
  161.  
  162. # Loop through the variable names and get the corresponding variables using globals()
  163. for var_name in all_results:
  164.     result_variables.append(globals()[var_name])
  165.  
  166. # Now, result_variables contains the variables with names specified in variable_names
  167. all_results = result_variables
  168. new_persons_results = result_variables
  169.  
  170. # Extract the last column (0 or 1) as labels
  171. labels = [results[-1] for results in all_results]
  172.  
  173. # Remove the last column from the dataset
  174. data = [results[:-1] for results in all_results]
  175.  
  176. # Define the number of rows for training and testing
  177. num_training_rows = 50
  178. num_testing_rows = 50
  179.  
  180. # Split the data into training and testing datasets
  181. X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows]
  182.  
  183. # Normalize the data using StandardScaler
  184. scaler = StandardScaler()
  185. X_train_normalized = scaler.fit_transform(X_train)
  186.  
  187. # Print normalized training data
  188. print("Normalized Training Data:")
  189. print(X_train_normalized)
  190. print("Adenormalized (Z-score):", X_train_normalized * scaler.scale_ + scaler.mean_, "Bdenormalized")
  191.  
  192. # Define a simple neural network model
  193. model = tf.keras.Sequential([
  194.     tf.keras.layers.Dense(128, activation='relu', input_shape=(len(X_train[0]),)),
  195.     tf.keras.layers.Dense(64, activation='relu'),
  196.     tf.keras.layers.Dense(1, activation='sigmoid')
  197. ])
  198.  
  199. # Compile the model
  200. model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  201.  
  202. # Lists to store accuracy values
  203. accuracy_history = []
  204.  
  205. # Create images for the training data
  206. image_training = np.zeros((num_training_rows, len(X_train[0]) + 1, 3), dtype=np.uint8)
  207.  
  208. min_pixel_value = np.min(X_train_normalized)
  209. max_pixel_value = np.max(X_train_normalized)
  210.  
  211. # Populate image_training with consistent gray and red/green colors based on the labels in the last column
  212. for i, label in enumerate(y_train):
  213.     for j in range(len(X_train[0])):
  214.         pixel_value = int(np.interp(X_train_normalized[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  215.         image_training[i, j] = np.array([pixel_value] * 3)
  216.     image_training[i, -1] = np.array([128, 128, 128])
  217.     if label == 0:
  218.         image_training[i, -1] = np.array([0, 128, 0])
  219.     elif label == 1:
  220.         image_training[i, -1] = np.array([255, 0, 0])
  221.  
  222. from tqdm.notebook import tqdm_notebook
  223.  
  224. # Train the model for 400 epochs
  225. epochs = 1397
  226.  
  227. new_persons_results = [
  228.     [0.030391238492519845, 0.23021081913032299, 0.4743575198860915, 0.639395348276238],
  229.     [0.19790381537769108, 0.37639843860181527, 0.5676528538456297, 0.716530820399044],
  230.     [0.0035245462826666075, 0.23127629815305784, 0.4802171123709532, 0.6591272725083992],
  231.     [0.059230621364548486, 0.24424510845680134, 0.442553808602372, 0.6891856336835676],
  232.     [0.05536813173866345, 0.2538888869331579, 0.47861285542743165, 0.6200559751500355],
  233.     [0.1300359168058454, 0.38443677757577344, 0.5957238735056223, 0.795823160451845],
  234.     [0.1743368240338569, 0.3713129035302336, 0.5640350202165867, 0.7213527928848786],
  235.     [0.09173335232875372, 0.2559096689549753, 0.49527436563146954, 0.6970388573439903],
  236.     [0.015235204378572087, 0.2284904031445293, 0.46613902406934005, 0.6917336579549159],
  237.     [0.0011416656054787145, 0.24567669307188245, 0.4388400949432476, 0.667323193441009],
  238.     [0.06448448763849592, 0.2115323519931734, 0.43540989127902197, 0.6438994375658477],
  239.     [0.1281083326647467, 0.319011666415554, 0.5081581898266203, 0.7238539046118706],
  240.     [0.031400839963864634, 0.291826671945583, 0.44935681772218605, 0.6775565554946026],
  241.     [0.06087306495870359, 0.23991257024083634, 0.4485025638007111, 0.680857926545652],
  242.     [0.16944969856928027, 0.3433985701275623, 0.5739960718239413, 0.7587431345359652],
  243.     [0.005679255126553562, 0.2703897890888177, 0.47083369294815347, 0.629981449029764],
  244.     [0.1361411186548812, 0.3699350229482504, 0.5880045061520169, 0.709568518897945],
  245.     [0.07538207440920129, 0.20062324901664458, 0.40581823748211543, 0.6337591072862666],
  246.     [0.017969960867618918, 0.21435679119605028, 0.4881930298975361, 0.668393388428822],
  247.     [0.08565125103289023, 0.29383944243424687, 0.4732898824502158, 0.6500725888934386],
  248.     [0.08477898050514117, 0.21919257927575692, 0.49117946288913483, 0.6532321710710468],
  249.     [0.18637585263771708, 0.30107373793178105, 0.5235238878093704, 0.7912391738401261],
  250. ]
  251.  
  252. import sys
  253.  
  254. for epoch in tqdm_notebook(range(epochs)):
  255.     history = model.fit(X_train_normalized, np.array(y_train), epochs=1, verbose=0, shuffle=False)
  256.     accuracy_history.append(history.history['accuracy'][0])
  257.  
  258.     if epoch == 1:
  259.         # Normalize the testing data
  260.         X_test_normalized = scaler.transform(X_test)
  261.         y_pred_after_2nd_epoch = model.predict(X_test_normalized)
  262.         y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch]
  263.         image_testing_before_2nd_epoch = create_image(X_test_normalized, y_pred_binary_after_2nd_epoch, label_colors_testing)
  264.  
  265.     if epoch >= epochs-1:
  266.         print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n")
  267.         sys.stdout.flush()
  268.  
  269.         # Iterate through new persons
  270.         for idx, personNEW_results in enumerate(new_persons_results, start=1):
  271.             # Ensure that personNEW_results has the same number of features as the model expects
  272.             assert len(personNEW_results) == len(X_train[0]), "Mismatch in the number of features."
  273.  
  274.             personNEW_results_normalized = scaler.transform([personNEW_results])
  275.  
  276.             personNEW_prediction = model.predict(np.array([personNEW_results_normalized]))
  277.             personNEW_label = 1 if personNEW_prediction >= 0.5 else 0
  278.             y_pred_after_50_epochs = model.predict(X_test_normalized)
  279.             y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs]
  280.             image_testing_after_50_epochs = create_image(X_test_normalized, y_pred_binary_after_50_epochs, label_colors_testing)
  281.  
  282.             # Create an image for the new person
  283.             image_personNEW = create_imageN([personNEW_results_normalized[0]], [personNEW_label], label_colors)
  284.  
  285.             # Display the images
  286.             plt.figure(figsize=(5, 5))
  287.             plt.imshow(image_personNEW)
  288.             plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}")
  289.             plt.axis("off")
  290.             plt.show()
  291.  
  292. # Display the images
  293. plt.figure(figsize=(25, 15))
  294. plt.subplot(2, 2, 1)
  295. plt.imshow(image_training)
  296. plt.title("Training Data")
  297. plt.axis("off")
  298.  
  299. plt.subplot(2, 2, 2)
  300. plt.imshow(image_testing_before_2nd_epoch)
  301. plt.title("Testing Data (2nd Epoch)")
  302. plt.axis("off")
  303.  
  304. plt.subplot(2, 2, 3)
  305. plt.imshow(image_testing_after_50_epochs)
  306. plt.title(f"Testing Data ({epochs} Epochs)")
  307. plt.axis("off")
  308.  
  309. plt.subplot(2, 2, 4)
  310. plt.imshow(image_personNEW)
  311. plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]")
  312. plt.axis("off")
  313.  
  314. # Plot accuracy history
  315. plt.figure(figsize=(12, 5))
  316. plt.plot(range(1, epochs + 1), accuracy_history, marker='o')
  317. plt.title('Accuracy Over Epochs')
  318. plt.xlabel('Epochs')
  319. plt.ylabel('Accuracy')
  320. plt.grid()
  321.  
  322. # Print normalized data
  323. print("Normalized PersonNEW Data:")
  324. print(personNEW_results_normalized)
  325.  
  326. plt.show()
  327.  
  328. print("X_train before normalization:")
  329. print(X_train)
  330. print("X_test before normalization:")
  331. print(X_test)
  332.  
  333. import seaborn as sns
  334.  
  335. from sklearn.metrics import confusion_matrix
  336. from tensorflow.keras.utils import to_categorical
  337.  
  338. np.set_printoptions(threshold=np.inf, precision=4, suppress=True)
  339.  
  340. # Train the model
  341. print("Training Start")
  342. for epoch in tqdm_notebook(range(1000), desc="Training Progress"):
  343.     model.fit(np.array(X_train_normalized), np.array(y_train), epochs=1, verbose=0)
  344. print("Training Complete")
  345.  
  346. # Generate predictions from the model
  347. predictions = (model.predict(X_test_normalized) > 0.5).astype(int)
  348.  
  349. # Convert y_test to a numpy array and then to binary labels
  350. y_test_array = np.array(y_test)  # Convert y_test to a numpy array
  351. y_test_binary = (y_test_array > 0.5).astype(int)  # Convert to binary
  352.  
  353. # Compute the confusion matrix
  354. conf_matrix = confusion_matrix(y_test_binary, predictions)
  355.  
  356. # Evaluate the model's performance
  357. accuracy = accuracy_score(y_test_binary, predictions)
  358. precision = precision_score(y_test_binary, predictions)
  359. recall = recall_score(y_test_binary, predictions)
  360. f1 = f1_score(y_test_binary, predictions)
  361.  
  362. # Display the confusion matrix
  363. sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
  364. plt.xlabel('Predicted')
  365. plt.ylabel('Actual')
  366. plt.title('Confusion Matrix')
  367. plt.show()
  368.  
  369. print(f"Accuracy: {accuracy:.4f}")
  370. print(f"Precision: {precision:.4f}")
  371. print(f"Recall: {recall:.4f}")
  372. print(f"F1 Score: {f1:.4f}")
  373.  
  374. print(f"Confusion Matrix:\n{conf_matrix}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement