Advertisement
max2201111

quite good VK

May 28th, 2024
936
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 24.28 KB | Science | 0 0
  1. #Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024
  2. #Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1)  a bez znaku #; pozor na ","
  3.  
  4. # [ [23.657800719276743,18.859916797201468,0],
  5. # [22.573729142097473,17.96922325097786,0],
  6. # [32.55342396968757,29.463651408558803,0],
  7. # [6.718035041529263,25.704665468161718,1],
  8. # [14.401918566243225,16.770856492924658,0],
  9. # [17.457907312962234,21.76521470574044,0],
  10. # [20.02796946568093,73.45445954770891,1],
  11. # [30.295138369778076,62.901112886193246,1],
  12. # [15.128977804449633,32.40267702110393,0],
  13. # [30.179457395820013,58.982492125646104,1],
  14. # [28.01649701854089,63.92781357637711,1],
  15. # [16.791838457871147,42.33482314089884,0],
  16. # [10.583694293380976,19.61926728942497,0],
  17. # [26.634447074406467,91.96624817360987,1],
  18. # [26.217868623367643,36.400293587062976,0],
  19. # [17.689396788624936,60.79797114006423,1],
  20. # [33.17193822527976,66.75277364959176,1],
  21. # [23.793952755709153,22.57501437360518,0]]
  22.  
  23. #kliknete na cerne tlacitko s trojuhelnickem vlevo nahore
  24. #pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej
  25. #soubor, ktery mate z bodu vyse vyberte a nahrajte
  26. #Najdete v tomto kodu retezec:
  27. ###ZDE VLOZTE DATA OD NOVYCH PACIENTU
  28.  
  29. #Vlozte do pole
  30. # new_persons_results = []
  31. # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2
  32. #kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny
  33. #zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1)
  34. #cisla uprostred pak indikuji zadany oranzovy semafor.
  35. #je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny
  36.  
  37. # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky
  38. # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd.
  39.  
  40. #kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti,F1 score recall atd.
  41. #poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd.
  42. # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy
  43.  
  44. #Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna
  45.  
  46. import numpy as np
  47. import matplotlib.pyplot as plt
  48. import tensorflow as tf
  49. from tqdm import tqdm
  50.  
  51.  
  52. from IPython.display import display
  53. from IPython.display import Javascript
  54. display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;'))
  55.  
  56. label_colors = {0: [0, 128, 0], 1: [255, 0, 0]}
  57. label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]}
  58.  
  59.  
  60. %matplotlib inline
  61.  
  62.  
  63.  
  64. # Function to create images based on predictions
  65. def create_image(data, predictions):
  66.     num_rows, num_columns = len(data), len(data[0])
  67.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  68.  
  69.     for i in range(num_rows):
  70.         for j in range(num_columns):
  71.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  72.             image[i, j] = np.array([pixel_value] * 3)
  73.  
  74.         # Create a gradient based on the normalized values
  75.         gradient_value = int(np.interp(predictions[i], [0, 1], [0, 255]))
  76.         image[i, -1] = np.array([gradient_value] * 3)
  77.  
  78.     return image
  79.  
  80. def create_image(data, predictions):
  81.     num_rows, num_columns = len(data), len(data[0])
  82.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  83.  
  84.     for i in range(num_rows):
  85.         for j in range(num_columns):
  86.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  87.             image[i, j] = np.array([pixel_value] * 3)
  88.  
  89.         # Use red for class 0 and green for class 1
  90.         if predictions[i] == 0:
  91.             image[i, -1] = np.array([255, 0, 0])  # Red
  92.         elif predictions[i] == 1:
  93.             image[i, -1] = np.array([0, 128, 0])  # Green
  94.  
  95.     return image
  96.  
  97. def create_image(data, predictions, label_colors):
  98.     num_rows, num_columns = len(data), len(data[0])
  99.     image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
  100.  
  101.     for i in range(num_rows):
  102.         for j in range(num_columns):
  103.             pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
  104.             image[i, j] = np.array([pixel_value] * 3)
  105.  
  106.         # Use the specified color for the last column based on the label
  107.         image[i, -1] = label_colors[predictions[i]]
  108.  
  109.     return image
  110.  
  111.  
  112.  
  113. def create_imageN(data, predictions, label_colors=None):
  114.     num_training_rows = len(data)  # Set the number of rows based on the data
  115.     num_columns = len(data[0])
  116.  
  117.     image_training = np.zeros((num_training_rows, num_columns + 1, 3), dtype=np.uint8)
  118.  
  119.     min_pixel_value = np.min(X_train_normalized)
  120.     max_pixel_value = np.max(X_train_normalized)
  121.  
  122.  
  123.  
  124.  
  125.     for i in range(num_training_rows):
  126.         # Normalize the first columns independently
  127.         for j in range(num_columns):
  128.             pixel_value = int(np.interp(data[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  129.             image_training[i, j] = np.array([pixel_value] * 3)
  130.  
  131.         # Normalize the last column separately to achieve grayscale
  132.         pixel_value_last = int(np.interp(data[i][-1], [min_pixel_value, max_pixel_value], [0, 255]))
  133.         image_training[i, -1] = np.array([pixel_value_last] * 3)
  134.  
  135.         # Use the specified color for the last column based on the label
  136.         if label_colors is not None:
  137.             image_training[i, -1] = label_colors[predictions[i]]
  138.  
  139.     return image_training
  140.  
  141.  
  142.  
  143.  
  144. # Load data from a file
  145. #file_path = 'C:/Users/Hynek/Desktop/example4.txt'
  146. from google.colab import files
  147. uploaded = files.upload()
  148.  
  149. # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače.
  150. import io
  151. import pandas as pd
  152.  
  153. # Předpokládáme, že jste nahráli CSV soubor
  154. for fn in uploaded.keys():
  155.   print('User uploaded file "{name}" with length {length} bytes'.format(
  156.       name=fn, length=len(uploaded[fn])))
  157.   path = io.BytesIO(uploaded[fn])  # Pro soubory, které potřebují být čteny jako binární objekty
  158.   df = pd.read_csv(path)
  159.   print(df.head())  # Vypíše prvních pět řádků DataFrame
  160.  
  161.  
  162. all_results = []
  163. #with open(file_path, 'r') as file:
  164. #    file_content = file.read()
  165.  
  166. # Execute the content as Python code
  167. ##exec(file_content)
  168.  
  169. import os
  170. import shutil
  171. import ast
  172.  
  173. for filename in uploaded.keys():
  174.     original_path = f"/content/{filename}"
  175.     destination_path = os.path.join("/content/", "/content/DATA2")
  176.     shutil.move(original_path, destination_path)
  177.     print(f"Soubor {filename} byl přesunut do {destination_path}")
  178.  
  179. file_path = '/content/DATA2'  # Cesta k souboru
  180. with open(file_path, 'r') as file:
  181.     code = file.read()
  182.  
  183. A_list = ast.literal_eval(code)
  184.  
  185.  
  186. # Převod na NumPy pole
  187. A = np.array(A_list)
  188.  
  189. #exec(code)
  190.  
  191. # Now, all_results contains lists corresponding to each row in the CSV file
  192. ##print(all_results)
  193.  
  194. # Assign values to variables dynamically based on the rows of matrix A
  195. for i, row in enumerate(A, start=1):
  196.     globals()[f"person{i}_results"] = list(row)
  197.  
  198. # Print the assigned variables
  199. for i in range(1, len(A) + 1):
  200.   #  print(f"person{i}_results {globals()[f'person{i}_results']}")
  201.     all_results.append(f"person{i}_results")
  202. ##print(all_results)
  203.  
  204.  
  205.  
  206. result_variables = []
  207.  
  208. # Loop through the variable names and get the corresponding variables using globals()
  209. for var_name in all_results:
  210.     result_variables.append(globals()[var_name])
  211.  
  212. # Now, result_variables contains the variables with names specified in variable_names
  213. #print(result_variables)
  214.  
  215. all_results = result_variables
  216. new_persons_results = result_variables
  217.  
  218.  
  219.  
  220. labels = [results[-1] for results in all_results]
  221.  
  222. # Odstranění posledního sloupce z datasetu
  223. data = [results[:-1] for results in all_results]
  224.  
  225. # Definice počtu řádků pro trénování a testování
  226. num_training_rows = 100
  227. num_testing_rows = 100
  228.  
  229. # Rozdělení datasetu na trénovací a testovací sady
  230. X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows]
  231.  
  232. # Převod na NumPy pole
  233. X_train = np.array(X_train)
  234. X_test = np.array(X_test)
  235. y_train = np.array(y_train)
  236. y_test = np.array(y_test)
  237.  
  238. # Normalizace dat (s ohledem na -1)
  239. min_values = np.min(X_train[X_train != -1], axis=0)
  240. max_values = np.max(X_train[X_train != -1], axis=0)
  241. X_train_normalized = (X_train - min_values) / (max_values - min_values)
  242.  
  243.  
  244.  
  245. import numpy as np
  246. import tensorflow as tf
  247. import matplotlib.pyplot as plt
  248. from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
  249. import seaborn as sns
  250. from tqdm.notebook import tqdm_notebook
  251.  
  252.  
  253.  
  254. import numpy as np
  255. import matplotlib.pyplot as plt
  256. import tensorflow as tf
  257. from tqdm.notebook import tqdm_notebook
  258.  
  259. # Průměry (mu) a směrodatné odchylky (sigma)
  260. mu = np.mean(X_train, axis=0)
  261. sigma = np.std(X_train, axis=0)
  262.  
  263. # Normalizace každého sloupce zvlášť
  264. X_train_standardized = (X_train - mu) / sigma
  265. X_test_standardized = (X_test - mu) / sigma
  266.  
  267. # Vylepšený model
  268. model = tf.keras.Sequential([
  269.     tf.keras.layers.Dense(256, activation='relu', input_shape=(len(X_train[0]),)),
  270.     tf.keras.layers.Dropout(0.3),
  271.     tf.keras.layers.Dense(128, activation='relu'),
  272.     tf.keras.layers.Dropout(0.3),
  273.     tf.keras.layers.Dense(64, activation='relu'),
  274.     tf.keras.layers.Dropout(0.3),
  275.     tf.keras.layers.Dense(1, activation='sigmoid')
  276. ])
  277.  
  278. # Použití Adam optimizer s learning rate schedulerem
  279. lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
  280.     initial_learning_rate=1e-3,
  281.     decay_steps=10000,
  282.     decay_rate=0.9
  283. )
  284. optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
  285.  
  286. # Kompilace modelu
  287. model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
  288.  
  289. # Lists to store accuracy values
  290. accuracy_history = []
  291.  
  292. # Create images for the training data
  293. image_training = np.zeros((num_training_rows, len(X_train[0]) + 1, 3), dtype=np.uint8)
  294.  
  295. min_pixel_value = np.min(X_train_standardized, axis=0)
  296. max_pixel_value = np.max(X_train_standardized, axis=0)
  297.  
  298. for i, label in enumerate(y_train):
  299.     for j in range(len(X_train_standardized[0])):
  300.         pixel_value = int(np.interp(X_train_standardized[i][j], [min_pixel_value[j], max_pixel_value[j]], [0, 255]))
  301.         image_training[i, j] = np.array([pixel_value] * 3)
  302.     image_training[i, -1] = np.array([128, 128, 128])
  303.     if label == 0:
  304.         image_training[i, -1] = np.array([0, 128, 0])
  305.     elif label == 1:
  306.         image_training[i, -1] = np.array([255, 0, 0])
  307.  
  308. # Training the model
  309. epochs = 139
  310. new_persons_results = [
  311.     [23.65780072, 18.8599168],
  312.     [22.57372914, 17.96922325],
  313.     [32.55342397, 29.46365141],
  314.     [ 6.71803504, 25.70466547],
  315.     [14.40191857, 16.77085649],
  316.     [17.45790731, 21.76521471],
  317.     [20.02796947, 73.45445955],
  318.     [26.2042, 10.6782],
  319.     [35.7258, 12.8027],
  320.     [21.2, 7.8],
  321.     [50.1, 40.2],
  322.     [32.739, 42.0152],
  323.     [28.1, 10.1],
  324. ]
  325.  
  326. import sys
  327.  
  328. for epoch in tqdm_notebook(range(epochs)):
  329.     history = model.fit(X_train_standardized, np.array(y_train), epochs=1, verbose=0, shuffle=False)
  330.     accuracy_history.append(history.history['accuracy'][0])
  331.  
  332.     if epoch == 1:
  333.         # Normalize the testing data
  334.         X_test_standardized = (X_test - mu) / sigma
  335.         y_pred_after_2nd_epoch = model.predict(X_test_standardized)
  336.         y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch]
  337.         image_testing_before_2nd_epoch = create_image(X_test_standardized, y_pred_binary_after_2nd_epoch, label_colors_testing)
  338.  
  339.     if epoch >= epochs-1:
  340.         print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n")
  341.         sys.stdout.flush()
  342.  
  343.         # Iterate through new persons
  344.         for idx, personNEW_results in enumerate(new_persons_results, start=0):
  345.             # Ensure that personNEW_results has the same number of features as the model expects
  346.             assert len(personNEW_results) == len(X_train[0]), "Mismatch in the number of features."
  347.  
  348.             personNEW_results_standardized = (np.array(personNEW_results) - mu) / sigma
  349.  
  350.             personNEW_prediction = model.predict(np.array([personNEW_results_standardized]))
  351.             personNEW_label = 1 if personNEW_prediction >= 0.5 else 0
  352.             y_pred_after_50_epochs = model.predict(X_test_standardized)
  353.             y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs]
  354.             image_testing_after_50_epochs = create_image(X_test_standardized, y_pred_binary_after_50_epochs, label_colors_testing)
  355.  
  356.             # Create an image for the new person
  357.             image_personNEW = create_imageN([personNEW_results_standardized], [personNEW_label], label_colors)
  358.  
  359.             # Display the images
  360.             plt.figure(figsize=(5, 5))
  361.             plt.imshow(image_personNEW)
  362.             plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}, personNEW_results: {personNEW_results}")
  363.             plt.axis("off")
  364.             plt.show()
  365.  
  366. # Display the images
  367. plt.figure(figsize=(25, 15))
  368. plt.subplot(2, 2, 1)
  369. plt.imshow(image_training)
  370. plt.title("Training Data")
  371. plt.axis("off")
  372.  
  373. plt.subplot(2, 2, 2)
  374. plt.imshow(image_testing_before_2nd_epoch)
  375. plt.title("Testing Data (2nd Epoch)")
  376. plt.axis("off")
  377.  
  378. plt.subplot(2, 2, 3)
  379. plt.imshow(image_testing_after_50_epochs)
  380. plt.title(f"Testing Data ({epochs} Epochs)")
  381. plt.axis("off")
  382.  
  383. plt.subplot(2, 2, 4)
  384. plt.imshow(image_personNEW)
  385. plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]")
  386. plt.axis("off")
  387.  
  388. # Plot accuracy history
  389. plt.figure(figsize=(12, 5))
  390. plt.plot(range(1, epochs + 1), accuracy_history, marker='o')
  391. plt.title('Accuracy Over Epochs')
  392. plt.xlabel('Epochs')
  393. plt.ylabel('Accuracy')
  394. plt.grid()
  395.  
  396. # Print standardized data
  397. print("Standardized PersonNEW Data:")
  398. print(personNEW_results_standardized)
  399.  
  400. plt.show()
  401.  
  402. print("X_train before standardization:")
  403. print(X_train)
  404. print("X_test before standardization:")
  405. print(X_test)
  406.  
  407. import seaborn as sns
  408.  
  409. print("KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK")
  410. print(X_test)
  411. print("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH")
  412. print(X_train)
  413. print("LLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
  414.  
  415. # y_pred_binary = [1 if pred >= 0.5 else 0 for pred in model.predict(X_test_standardized)]
  416.  
  417. # # Create confusion matrix
  418. # conf_matrix = confusion_matrix(y_train, y_pred_binary)
  419. # print(conf_matrix)
  420.  
  421. from sklearn.metrics import confusion_matrix
  422. from tensorflow.keras.utils import to_categorical
  423.  
  424. np.set_printoptions(threshold=np.inf, precision=4, suppress=True)
  425.  
  426. # Assuming X_test_standardized and y_test are your test set data
  427. y_pred_binary = [1 if pred >= 0.5 else 0 for pred in model.predict(X_test_standardized)]
  428.  
  429. # Create confusion matrix using the test set
  430. conf_matrix = confusion_matrix(y_test, y_pred_binary)
  431. print(conf_matrix)
  432.  
  433. plt.figure(figsize=(6, 6))
  434. sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1'])
  435. plt.xlabel("Predicted Label")
  436. plt.ylabel("True Label")
  437. plt.title("Confusion Matrix")
  438. plt.show()
  439.  
  440. X_train = np.array(X_train)
  441. #y_train_one_hot = np.array(y_train_one_hot)
  442.  
  443. # Rozdělení dat na trénovací a testovací množiny
  444. X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows]
  445.  
  446. import numpy as np
  447. import matplotlib.pyplot as plt
  448. from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
  449. import tensorflow as tf
  450. import seaborn as sns
  451.  
  452. # Assuming data splitting and model definition have been done correctly
  453.  
  454. # Compile the model
  455. model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
  456.  
  457. # Train the model
  458. print("Training Start")
  459. for epoch in tqdm_notebook(range(1000), desc="Training Progress"):
  460.     model.fit(np.array(X_train_standardized), np.array(y_train), epochs=1, verbose=0)
  461. print("Training Complete")
  462.  
  463. # Generate predictions from the model
  464. predictions = (model.predict(X_test_standardized) > 0.5).astype(int)
  465.  
  466. # Convert y_test to a numpy array and then to binary labels
  467. y_test_array = np.array(y_test)  # Convert y_test to a numpy array
  468. y_test_binary = (y_test_array > 0.5).astype(int)  # Convert to binary
  469.  
  470. # Compute the confusion matrix
  471. conf_matrix = confusion_matrix(y_test_binary, predictions)
  472.  
  473. # Evaluate the model's performance
  474. accuracy = accuracy_score(y_test_binary, predictions)
  475. precision = precision_score(y_test_binary, predictions)
  476. recall = recall_score(y_test_binary, predictions)
  477. f1 = f1_score(y_test_binary, predictions)
  478.  
  479. # Display the confusion matrix
  480. sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
  481. plt.xlabel('Predicted')
  482. plt.ylabel('Actual')
  483. plt.title('Confusion Matrix')
  484. plt.show()
  485.  
  486. print(f"Accuracy: {accuracy:.4f}")
  487. print(f"Precision: {precision:.4f}")
  488. print(f"Recall: {recall:.4f}")
  489. print(f"F1 Score: {f1:.4f}")
  490.  
  491. print(f"Confusion Matrix2122:\n{conf_matrix}")
  492.  
  493. import random
  494.  
  495. def find_best_pair(min_val, max_val, num_features, model, min_values, max_values):
  496.     best_pair = None
  497.     best_prediction = 1
  498.     for _ in range(1000):  # Number of iterations to find the best pair
  499.         new_data = np.random.uniform(min_val, max_val, num_features)
  500.         new_data_standardized = (new_data - mu) / sigma
  501.        
  502.         # Suppress model output
  503.         tf.get_logger().setLevel('ERROR')
  504.         with tf.device('/CPU:0'):  # Ensure to run on CPU to minimize unwanted logs
  505.             prediction = model.predict(np.array([new_data_standardized]), verbose=0)[0][0]
  506.         tf.get_logger().setLevel('INFO')
  507.        
  508.         if prediction < best_prediction:
  509.             best_prediction = prediction
  510.             best_pair = new_data
  511.     return best_pair, best_prediction
  512.  
  513. best_pair, best_prediction = find_best_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values)
  514.  
  515. def find_worst_pair(min_val, max_val, num_features, model, min_values, max_values):
  516.     worst_pair = None
  517.     worst_prediction = 0
  518.     for _ in range(1000):  # Number of iterations to find the best pair
  519.         new_data = np.random.uniform(min_val, max_val, num_features)
  520.         new_data_standardized = (new_data - mu) / sigma
  521.        
  522.         # Suppress model output
  523.         tf.get_logger().setLevel('ERROR')
  524.         with tf.device('/CPU:0'):  # Ensure to run on CPU to minimize unwanted logs
  525.             prediction = model.predict(np.array([new_data_standardized]), verbose=0)[0][0]
  526.         tf.get_logger().setLevel('INFO')
  527.        
  528.         if prediction > worst_prediction:
  529.             worst_prediction = prediction
  530.             worst_pair = new_data
  531.     return worst_pair, worst_prediction
  532.  
  533. worst_pair, worst_prediction = find_worst_pair(min_values, max_values, len(X_train[0]), model, min_values, max_values)
  534.  
  535. print(f"Best Pair: {best_pair}, Best Prediction: {best_prediction}")
  536. print(f"Worst Pair: {worst_pair}, Worst Prediction: {worst_prediction}")
  537.  
  538.  
  539.  
  540. import numpy as np
  541. import matplotlib.pyplot as plt
  542. import tensorflow as tf
  543. from sklearn.metrics import recall_score, confusion_matrix, accuracy_score, precision_score, f1_score
  544. import seaborn as sns
  545. from tqdm.notebook import tqdm_notebook
  546. from sklearn.decomposition import PCA
  547. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
  548.  
  549.  
  550.  
  551. # Rozdělení na vstupní data (X) a cílové proměnné (y)
  552. X = A[:, :-1]
  553. y = A[:, -1]
  554.  
  555. # Rozdělení na trénovací a testovací sadu (v tomto příkladě použijeme celou sadu jako trénovací pro jednoduchost)
  556. X_train, y_train = X, y
  557. X_test, y_test = X, y
  558.  
  559. # Výpočet průměru a směrodatné odchylky pro každý sloupec
  560. mu = np.mean(X_train, axis=0)
  561. sigma = np.std(X_train, axis=0)
  562.  
  563. # Normalizace každého sloupce zvlášť
  564. X_train_standardized = (X_train - mu) / sigma
  565.  
  566. # Normalizace testovacích dat
  567. X_test_standardized = (X_test - mu) / sigma
  568.  
  569. # Definice modelu
  570. model = tf.keras.Sequential([
  571.     tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train_standardized.shape[1],)),
  572.     tf.keras.layers.Dropout(0.3),
  573.     tf.keras.layers.Dense(128, activation='relu'),
  574.     tf.keras.layers.Dropout(0.3),
  575.     tf.keras.layers.Dense(64, activation='relu'),
  576.     tf.keras.layers.Dropout(0.3),
  577.     tf.keras.layers.Dense(1, activation='sigmoid')
  578. ])
  579.  
  580. # Použití Adam optimizer s learning rate schedulerem
  581. lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
  582.     initial_learning_rate=1e-3,
  583.     decay_steps=10000,
  584.     decay_rate=0.9
  585. )
  586. optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
  587.  
  588. # Kompilace modelu
  589. model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.Recall()])
  590.  
  591. # Trénování modelu
  592. history = model.fit(X_train_standardized, y_train, epochs=50, verbose=0, shuffle=False)
  593.  
  594. # Predikce
  595. y_pred_prob = model.predict(X_test_standardized)
  596. y_pred = (y_pred_prob > 0.5).astype(int)
  597.  
  598. # Výpočet metrik
  599. recall = recall_score(y_test, y_pred)
  600. conf_matrix = confusion_matrix(y_test, y_pred)
  601.  
  602. # Vyhodnocení výkonu modelu
  603. accuracy = accuracy_score(y_test, y_pred)
  604. precision = precision_score(y_test, y_pred)
  605. f1 = f1_score(y_test, y_pred)
  606.  
  607. # Výpis metrik
  608. print(f"Recall: {recall:.4f}")
  609. print(f"Accuracy: {accuracy:.4f}")
  610. print(f"Precision: {precision:.4f}")
  611. print(f"F1 Score: {f1:.4f}")
  612. print(f"Confusion Matrix:\n{conf_matrix}")
  613.  
  614. # Zobrazení confusion matrix
  615. sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
  616. plt.xlabel('Predicted')
  617. plt.ylabel('Actual')
  618. plt.title('Confusion Matrix')
  619. plt.show()
  620.  
  621. # Odstranění mezivýstupů
  622. num_iterations = 500
  623.  
  624. best_row = None
  625. best_prediction = None
  626. best_diff = float('inf')
  627.  
  628. for _ in range(num_iterations):
  629.     new_data = np.random.normal(mu, sigma)
  630.     new_data_standardized = (new_data - mu) / sigma
  631.     prediction_prob = model.predict(np.array([new_data_standardized]), verbose=0)[0][0]
  632.     diff = abs(prediction_prob - 0.67)
  633.    
  634.     if diff < best_diff:
  635.         best_diff = diff
  636.         best_row = new_data
  637.         best_prediction = prediction_prob
  638.  
  639. print(f"Nejlepší řádek: {best_row}")
  640. print(f"Predikovaná hodnota: {best_prediction}")
  641. print(f"Rozdíl: {best_diff}")
  642.  
  643. # Vizualizace výsledků pomocí PCA
  644. X_standardized = (X - mu) / sigma
  645. pca = PCA(n_components=2)  # Snížení na 2 komponenty
  646. X_pca = pca.fit_transform(X_standardized)
  647.  
  648. plt.figure()
  649. plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
  650. plt.xlabel('První hlavní komponenta')
  651. plt.ylabel('Druhá hlavní komponenta')
  652. plt.title('PCA na vašich datech')
  653. plt.show()
  654.  
  655. # Vizualizace výsledků pomocí LDA
  656. lda = LDA(n_components=1)
  657. X_lda = lda.fit_transform(X_standardized, y)
  658.  
  659. plt.figure()
  660. plt.scatter(X_lda[:, 0], np.zeros_like(X_lda), c=y)
  661. plt.xlabel('První diskriminační komponenta')
  662. plt.title('LDA s učitelem')
  663. plt.show()
  664.  
  665. # Vytvoření obrazu pro trénovací data
  666. min_pixel_value = -3
  667. max_pixel_value = 3
  668.  
  669. image_training = np.zeros((len(X_train_standardized), len(X_train_standardized[0]) + 1, 3), dtype=np.uint8)
  670.  
  671. for i, label in enumerate(y_train):
  672.     for j in range(len(X_train_standardized[0])):
  673.         pixel_value = int(np.interp(X_train_standardized[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
  674.         image_training[i, j] = np.array([pixel_value] * 3)
  675.     image_training[i, -1] = np.array([128, 128, 128])  # Šedý sloupec pro všechny řádky
  676.     if label == 0:
  677.         image_training[i, -1] = np.array([0, 128, 0])  # Zelený sloupec pro label 0
  678.     elif label == 1:
  679.         image_training[i, -1] = np.array([255, 0, 0])  # Červený sloupec pro label 1
  680.  
  681. # Zobrazení obrazu
  682. plt.imshow(image_training)
  683. plt.title("Training Data")
  684. plt.axis("off")
  685. plt.show()
Advertisement
Comments
Add Comment
Please, Sign In to add comment
Advertisement