Advertisement
max2201111

final VK ok vse 3

May 30th, 2024
594
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 18.81 KB | Science | 0 0
  1. #Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024
  2. #Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1)  a bez znaku #; pozor na ","
  3.  
  4. # [ [23.657800719276743,18.859916797201468,0],
  5. # [22.573729142097473,17.96922325097786,0],
  6. # [32.55342396968757,29.463651408558803,0],
  7. # [6.718035041529263,25.704665468161718,1],
  8. # [14.401918566243225,16.770856492924658,0],
  9. # [17.457907312962234,21.76521470574044,0],
  10. # [20.02796946568093,73.45445954770891,1],
  11. # [30.295138369778076,62.901112886193246,1],
  12. # [15.128977804449633,32.40267702110393,0],
  13. # [30.179457395820013,58.982492125646104,1],
  14. # [28.01649701854089,63.92781357637711,1],
  15. # [16.791838457871147,42.33482314089884,0],
  16. # [10.583694293380976,19.61926728942497,0],
  17. # [26.634447074406467,91.96624817360987,1],
  18. # [26.217868623367643,36.400293587062976,0],
  19. # [17.689396788624936,60.79797114006423,1],
  20. # [33.17193822527976,66.75277364959176,1],
  21. # [23.793952755709153,22.57501437360518,0]]
  22.  
  23. #kliknete na cerne tlacitko s trojuhelnickem vlevo nahore
  24. #pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej
  25. #soubor, ktery mate z bodu vyse vyberte a nahrajte
  26. #Najdete v tomto kodu retezec:
  27. ###ZDE VLOZTE DATA OD NOVYCH PACIENTU
  28.  
  29. #Vlozte do pole
  30. # new_persons_results = []
  31. # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2
  32. #kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny
  33. #zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1)
  34. #cisla uprostred pak indikuji zadany oranzovy semafor.
  35. #je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny
  36.  
  37. # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky
  38. # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd.
  39.  
  40. #kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti,F1 score recall atd.
  41. #poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd.
  42. # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy
  43.  
  44. #Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna
  45.  
  46. import numpy as np
  47. import matplotlib.pyplot as plt
  48. import tensorflow as tf
  49. from tqdm import tqdm
  50. from IPython.display import display, Javascript
  51. import pandas as pd
  52. import io
  53. import shutil
  54. import os
  55. import ast
  56. import seaborn as sns
  57. from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
  58. from sklearn.decomposition import PCA
  59. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
  60.  
  61. display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;'))
  62.  
  63. label_colors = {0: [0, 128, 0], 1: [255, 0, 0]}
  64. label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]}
  65.  
  66. %matplotlib inline
  67.  
  68. # Function to create images based on predictions
  69. def create_image(data, predictions, label_colors):
  70.     num_rows, num_columns = len(data), len(data[0])
  71.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  72.  
  73.     for i in range(num_rows):
  74.         for j in range(num_columns):
  75.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  76.             image[i, j] = np.array([pixel_value] * 3)
  77.        
  78.         # Use the specified color for the last column based on the label
  79.         image[i, -1] = label_colors[predictions[i]]
  80.  
  81.     return image
  82.  
  83. def create_imageN(data, predictions, label_colors=None):
  84.     num_training_rows = len(data)  # Set the number of rows based on the data
  85.     num_columns = len(data[0])
  86.  
  87.     image_training = np.zeros((num_training_rows, num_columns + 1, 3), dtype=np.uint8)
  88.  
  89.     min_pixel_value = np.min(X_train_normalized)
  90.     max_pixel_value = np.max(X_train_normalized)
  91.  
  92.     for i in range(num_training_rows):
  93.         # Normalize the first columns independently
  94.         for j in range(num_columns):
  95.             pixel_value = int(np.interp(data[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  96.             image_training[i, j] = np.array([pixel_value] * 3)
  97.  
  98.         # Normalize the last column separately to achieve grayscale
  99.         pixel_value_last = int(np.interp(data[i][-1], [min_pixel_value, max_pixel_value], [0, 255]))
  100.         image_training[i, -1] = np.array([pixel_value_last] * 3)
  101.  
  102.         # Use the specified color for the last column based on the label
  103.         if label_colors is not None:
  104.             image_training[i, -1] = label_colors[predictions[i]]
  105.  
  106.     return image_training
  107.  
  108. # Load data from a file
  109. uploaded = files.upload()
  110.  
  111. # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače.
  112. # Předpokládáme, že jste nahráli CSV soubor
  113. for fn in uploaded.keys():
  114.     print(f'User uploaded file "{fn}" with length {len(uploaded[fn])} bytes')
  115.     path = io.BytesIO(uploaded[fn])
  116.     df = pd.read_csv(path)
  117.     print(df.head())  # Vypíše prvních pět řádků DataFrame
  118.  
  119. all_results = []
  120. for filename in uploaded.keys():
  121.     original_path = f"/content/{filename}"
  122.     destination_path = os.path.join("/content/", "DATA2")
  123.     shutil.move(original_path, destination_path)
  124.     print(f"Soubor {filename} byl přesunut do {destination_path}")
  125.  
  126. file_path = '/content/DATA2'  # Cesta k souboru
  127. with open(file_path, 'r') as file:
  128.     code = file.read()
  129.  
  130. A_list = ast.literal_eval(code)
  131.  
  132. # Převod na NumPy pole
  133. A = np.array(A_list)
  134.  
  135. # Assign values to variables dynamically based on the rows of matrix A
  136. for i, row in enumerate(A, start=1):
  137.     globals()[f"person{i}_results"] = list(row)
  138.  
  139. # Print the assigned variables
  140. for i in range(1, len(A) + 1):
  141.     all_results.append(f"person{i}_results")
  142.  
  143. result_variables = []
  144.  
  145. # Loop through the variable names and get the corresponding variables using globals()
  146. for var_name in all_results:
  147.     result_variables.append(globals()[var_name])
  148.  
  149. # Now, result_variables contains the variables with names specified in variable_names
  150. all_results = result_variables
  151. new_persons_results = result_variables
  152.  
  153. labels = [results[-1] for results in all_results]
  154.  
  155. # Odstranění posledního sloupce z datasetu
  156. data = [results[:-1] for results in all_results]
  157.  
  158. # Definice počtu řádků pro trénování a testování
  159. num_training_rows = 50
  160. num_testing_rows = 50
  161.  
  162. # Rozdělení datasetu na trénovací a testovací sady
  163. X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows]
  164.  
  165. # Převod na NumPy pole
  166. X_train = np.array(X_train)
  167. X_test = np.array(X_test)
  168. y_train = np.array(y_train)
  169. y_test = np.array(y_test)
  170.  
  171. # Normalizace dat (s ohledem na -1)
  172. min_values = np.min(X_train[X_train != -1], axis=0)
  173. max_values = np.max(X_train[X_train != -1], axis=0)
  174. X_train_normalized = (X_train - min_values) / (max_values - min_values)
  175.  
  176. import numpy as np
  177. import matplotlib.pyplot as plt
  178. import tensorflow as tf
  179. from tqdm.notebook import tqdm_notebook
  180.  
  181. # Průměry (mu) a směrodatné odchylky (sigma)
  182. mu = np.mean(X_train, axis=0)
  183. sigma = np.std(X_train, axis=0)
  184.  
  185. # Normalizace každého sloupce zvlášť
  186. X_train_standardized = (X_train - mu) / sigma
  187. X_test_standardized = (X_test - mu) / sigma
  188.  
  189. # Vylepšený model
  190. model = tf.keras.Sequential([
  191.     tf.keras.layers.Dense(256, activation='relu', input_shape=(len(X_train[0]),)),
  192.     tf.keras.layers.Dropout(0.3),
  193.     tf.keras.layers.Dense(128, activation='relu'),
  194.     tf.keras.layers.Dropout(0.3),
  195.     tf.keras.layers.Dense(64, activation='relu'),
  196.     tf.keras.layers.Dropout(0.3),
  197.     tf.keras.layers.Dense(1, activation='sigmoid')
  198. ])
  199.  
  200. # Použití Adam optimizer s learning rate schedulerem
  201. lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
  202.     initial_learning_rate=1e-3,
  203.     decay_steps=10000,
  204.     decay_rate=0.9
  205. )
  206. optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
  207.  
  208. # Kompilace modelu
  209. model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
  210.  
  211. # Lists to store accuracy values
  212. accuracy_history = []
  213.  
  214. # Create images for the training data
  215. image_training = np.zeros((num_training_rows, len(X_train[0]) + 1, 3), dtype=np.uint8)
  216.  
  217. min_pixel_value = np.min(X_train_standardized, axis=0)
  218. max_pixel_value = np.max(X_train_standardized, axis=0)
  219.  
  220. for i, label in enumerate(y_train):
  221.     for j in range(len(X_train_standardized[0])):
  222.         pixel_value = int(np.interp(X_train_standardized[i][j], [min_pixel_value[j], max_pixel_value[j]], [0, 255]))
  223.         image_training[i, j] = np.array([pixel_value] * 3)
  224.     image_training[i, -1] = np.array([128, 128, 128])
  225.     if label == 0:
  226.         image_training[i, -1] = np.array([0, 128, 0])
  227.     elif label == 1:
  228.         image_training[i, -1] = np.array([255, 0, 0])
  229.  
  230. # Training the model
  231. epochs = 139
  232. new_persons_results = [
  233.     [0.0697400418162155,0.048866857264291144,0.28641370855472326,0.2721997143501177],
  234.     [0.14159602676789837,0.1747877034447084,0.35616475477076587,0.3349487498168958],
  235.     [0.11173253224821383,0.18794447828677996,0.3254176976987727,0.3413023918178341],
  236.     [0.09630381764770453,0.05449810810962146,0.26767869268577593,0.21134056616439179],
  237.     [0.17834821693532132,0.18466538062866059,0.3199711146234129,0.3968137366419059],
  238.     [0.06045619825051427,0.05598696779492471,0.21592696351263593,0.22040624440430515],
  239.     [0.08666288081774745,0.015388075894536557,0.2041876616268118,0.20706370434663773],
  240.     [0.03130184508345673,0.015266595360551428,0.27183777103946916,0.2867664339707584],
  241.     [0.05547626859495597,0.05808291988099526,0.2542166524648567,0.2573313511422864],
  242.     [0.1772, 0.0076, 0.3565, 0.2584],
  243. ]
  244.  
  245. import sys
  246.  
  247. for epoch in tqdm_notebook(range(epochs)):
  248.     history = model.fit(X_train_standardized, np.array(y_train), epochs=1, verbose=0, shuffle=False)
  249.     accuracy_history.append(history.history['accuracy'][0])
  250.  
  251.     if epoch == 1:
  252.         # Normalize the testing data
  253.         X_test_standardized = (X_test - mu) / sigma
  254.         y_pred_after_2nd_epoch = model.predict(X_test_standardized)
  255.         y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch]
  256.         image_testing_before_2nd_epoch = create_image(X_test_standardized, y_pred_binary_after_2nd_epoch, label_colors_testing)
  257.  
  258.     if epoch >= epochs-1:
  259.         print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n")
  260.         sys.stdout.flush()
  261.  
  262.         # Iterate through new persons
  263.         for idx, personNEW_results in enumerate(new_persons_results, start=0):
  264.             # Ensure that personNEW_results has the same number of features as the model expects
  265.             assert len(personNEW_results) == len(X_train[0]), "Mismatch in the number of features."
  266.  
  267.             personNEW_results_standardized = (np.array(personNEW_results) - mu) / sigma
  268.  
  269.             personNEW_prediction = model.predict(np.array([personNEW_results_standardized]))
  270.             personNEW_label = 1 if personNEW_prediction >= 0.5 else 0
  271.             y_pred_after_50_epochs = model.predict(X_test_standardized)
  272.             y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs]
  273.             image_testing_after_50_epochs = create_image(X_test_standardized, y_pred_binary_after_50_epochs, label_colors_testing)
  274.  
  275.             # Create an image for the new person
  276.             image_personNEW = create_imageN([personNEW_results_standardized], [personNEW_label], label_colors)
  277.  
  278.             # Display the images
  279.             plt.figure(figsize=(5, 5))
  280.             plt.imshow(image_personNEW)
  281.             plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}, personNEW_results: {personNEW_results}")
  282.             plt.axis("off")
  283.             plt.show()
  284.  
  285. # Display the images
  286. plt.figure(figsize=(25, 15))
  287. plt.subplot(2, 2, 1)
  288. plt.imshow(image_training)
  289. plt.title("Training Data")
  290. plt.axis("off")
  291.  
  292. plt.subplot(2, 2, 2)
  293. plt.imshow(image_testing_before_2nd_epoch)
  294. plt.title("Testing Data (2nd Epoch)")
  295. plt.axis("off")
  296.  
  297. plt.subplot(2, 2, 3)
  298. plt.imshow(image_testing_after_50_epochs)
  299. plt.title(f"Testing Data ({epochs} Epochs)")
  300. plt.axis("off")
  301.  
  302. plt.subplot(2, 2, 4)
  303. plt.imshow(image_personNEW)
  304. plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]")
  305. plt.axis("off")
  306.  
  307. plt.subplot(6, 6, 5)
  308. plt.imshow(image_personNEW)
  309. plt.title(f"X_standard111\nLabel: {personNEW_label},[{X_test_standardized}]")
  310. plt.axis("off")
  311.  
  312.  
  313. # Plot accuracy history
  314. plt.figure(figsize=(12, 5))
  315. plt.plot(range(1, epochs + 1), accuracy_history, marker='o')
  316. plt.title('Accuracy Over Epochs')
  317. plt.xlabel('Epochs')
  318. plt.ylabel('Accuracy')
  319. plt.grid()
  320.  
  321. # Print standardized data
  322. print("Standardized PersonNEW Data:")
  323. print(personNEW_results_standardized)
  324.  
  325. plt.show()
  326.  
  327. print("X_train before standardization:")
  328. print(X_train)
  329. print("X_test before standardization:")
  330. print(X_test)
  331.  
  332. print("KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK")
  333. print(X_test)
  334. print("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH")
  335. print(X_train)
  336. print("LLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
  337.  
  338. # Assuming X_test_standardized and y_test are your test set data
  339. y_pred_binary = [1 if pred >= 0.5 else 0 for pred in model.predict(X_test_standardized)]
  340.  
  341. # Create confusion matrix using the test set
  342. conf_matrix = confusion_matrix(y_test, y_pred_binary)
  343. print(conf_matrix)
  344.  
  345. plt.figure(figsize=(6, 6))
  346. sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1'])
  347. plt.xlabel("Predicted Label")
  348. plt.ylabel("True Label")
  349. plt.title("Confusion Matrix")
  350. plt.show()
  351.  
  352. # Compile the model again
  353. model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  354.  
  355. # Train the model
  356. print("Training Start")
  357. for epoch in tqdm_notebook(range(100), desc="Training Progress"):
  358.     model.fit(np.array(X_train_standardized), np.array(y_train), epochs=1, verbose=0)
  359. print("Training Complete")
  360.  
  361. # Generate predictions from the model
  362. predictions = (model.predict(X_test_standardized) > 0.5).astype(int)
  363.  
  364. # Convert y_test to a numpy array and then to binary labels
  365. y_test_array = np.array(y_test)  # Convert y_test to a numpy array
  366. y_test_binary = (y_test_array > 0.5).astype(int)  # Convert to binary
  367.  
  368. # Compute the confusion matrix
  369. conf_matrix = confusion_matrix(y_test_binary, predictions)
  370.  
  371. # Evaluate the model's performance
  372. accuracy = accuracy_score(y_test_binary, predictions)
  373. precision = precision_score(y_test_binary, predictions)
  374. recall = recall_score(y_test_binary, predictions)
  375. f1 = f1_score(y_test_binary, predictions)
  376.  
  377. # Display the confusion matrix
  378. sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
  379. plt.xlabel('Predicted')
  380. plt.ylabel('Actual')
  381. plt.title('Confusion Matrix')
  382. plt.show()
  383.  
  384. print(f"Accuracy: {accuracy:.4f}")
  385. print(f"Precision: {precision:.4f}")
  386. print(f"Recall: {recall:.4f}")
  387. print(f"F1 Score: {f1:.4f}")
  388.  
  389. print(f"Confusion Matrix2122:\n{conf_matrix}")
  390.  
  391. def find_best_pair(min_val, max_val, num_features, model, min_values, max_values):
  392.     best_pair = None
  393.     best_prediction = 1
  394.     for _ in range(1000):  # Number of iterations to find the best pair
  395.         new_data = np.random.uniform(min_val, max_val, num_features)
  396.         new_data_standardized = (new_data - mu) / sigma
  397.        
  398.         # Suppress model output
  399.         tf.get_logger().setLevel('ERROR')
  400.         with tf.device('/CPU:0'):  # Ensure to run on CPU to minimize unwanted logs
  401.             prediction = model.predict(np.array([new_data_standardized]), verbose=0)[0][0]
  402.         tf.get_logger().setLevel('INFO')
  403.        
  404.         if prediction < best_prediction:
  405.             best_prediction = prediction
  406.             best_pair = new_data
  407.     return best_pair, best_prediction
  408.  
  409. best_pair, best_prediction = find_best_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values)
  410.  
  411. def find_worst_pair(min_val, max_val, num_features, model, min_values, max_values):
  412.     worst_pair = None
  413.     worst_prediction = 0
  414.     for _ in range(1000):  # Number of iterations to find the best pair
  415.         new_data = np.random.uniform(min_val, max_val, num_features)
  416.         new_data_standardized = (new_data - mu) / sigma
  417.        
  418.         # Suppress model output
  419.         tf.get_logger().setLevel('ERROR')
  420.         with tf.device('/CPU:0'):  # Ensure to run on CPU to minimize unwanted logs
  421.             prediction = model.predict(np.array([new_data_standardized]), verbose=0)[0][0]
  422.         tf.get_logger().setLevel('INFO')
  423.        
  424.         if prediction > worst_prediction:
  425.             worst_prediction = prediction
  426.             worst_pair = new_data
  427.     return worst_pair, worst_prediction
  428.  
  429. worst_pair, worst_prediction = find_worst_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values)
  430.  
  431. print(f"Best Pair: {best_pair}, Best Prediction: {best_prediction}")
  432. print(f"Worst Pair: {worst_pair}, Worst Prediction: {worst_prediction}")
  433.  
  434. # Vizualizace výsledků pomocí PCA
  435. X_standardized = (X - mu) / sigma
  436. pca = PCA(n_components=2)  # Snížení na 2 komponenty
  437. X_pca = pca.fit_transform(X_standardized)
  438.  
  439. plt.figure()
  440. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
  441. plt.xlabel('První hlavní komponenta')
  442. plt.ylabel('Druhá hlavní komponenta')
  443. plt.title('PCA na vašich datech')
  444. plt.show()
  445.  
  446. # Vizualizace výsledků pomocí LDA
  447. lda = LDA(n_components=1)
  448. X_lda = lda.fit_transform(X_standardized, y)
  449.  
  450. plt.figure()
  451. plt.scatter(X_lda[:, 0], np.zeros_like(X_lda), c=y)
  452. plt.xlabel('První diskriminační komponenta')
  453. plt.title('LDA s učitelem')
  454. plt.show()
  455.  
  456. # Vytvoření obrazu pro trénovací data
  457. min_pixel_value = -3
  458. max_pixel_value = 3
  459.  
  460. image_training = np.zeros((len(X_train_standardized), len(X_train_standardized[0]) + 1, 3), dtype=np.uint8)
  461.  
  462. for i, label in enumerate(y_train):
  463.     for j in range(len(X_train_standardized[0])):
  464.         pixel_value = int(np.interp(X_train_standardized[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  465.         image_training[i, j] = np.array([pixel_value] * 3)
  466.     image_training[i, -1] = np.array([128, 128, 128])  # Šedý sloupec pro všechny řádky
  467.     if label == 0:
  468.         image_training[i, -1] = np.array([0, 128, 0])  # Zelený sloupec pro label 0
  469.     elif label == 1:
  470.         image_training[i, -1] = np.array([255, 0, 0])  # Červený sloupec pro label 1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement