Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024
- #Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1) a bez znaku #; pozor na ","
- # [ [23.657800719276743,18.859916797201468,0],
- # [22.573729142097473,17.96922325097786,0],
- # [32.55342396968757,29.463651408558803,0],
- # [6.718035041529263,25.704665468161718,1],
- # [14.401918566243225,16.770856492924658,0],
- # [17.457907312962234,21.76521470574044,0],
- # [20.02796946568093,73.45445954770891,1],
- # [30.295138369778076,62.901112886193246,1],
- # [15.128977804449633,32.40267702110393,0],
- # [30.179457395820013,58.982492125646104,1],
- # [28.01649701854089,63.92781357637711,1],
- # [16.791838457871147,42.33482314089884,0],
- # [10.583694293380976,19.61926728942497,0],
- # [26.634447074406467,91.96624817360987,1],
- # [26.217868623367643,36.400293587062976,0],
- # [17.689396788624936,60.79797114006423,1],
- # [33.17193822527976,66.75277364959176,1],
- # [23.793952755709153,22.57501437360518,0]]
- #kliknete na cerne tlacitko s trojuhelnickem vlevo nahore
- #pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej
- #soubor, ktery mate z bodu vyse vyberte a nahrajte
- #Najdete v tomto kodu retezec:
- ###ZDE VLOZTE DATA OD NOVYCH PACIENTU
- #Vlozte do pole
- # new_persons_results = []
- # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2
- #kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny
- #zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1)
- #cisla uprostred pak indikuji zadany oranzovy semafor.
- #je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny
- # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky
- # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd.
- #kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti,F1 score recall atd.
- #poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd.
- # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy
- #Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna
- import numpy as np
- import matplotlib.pyplot as plt
- import tensorflow as tf
- from tqdm import tqdm
- from IPython.display import display
- from IPython.display import Javascript
- display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;'))
- label_colors = {0: [0, 128, 0], 1: [255, 0, 0]}
- label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]}
- %matplotlib inline
- def create_imageN(data, predictions, label_colors=None):
- # Convert data to a NumPy array
- data = np.array(data)
- num_rows, num_columns = data.shape
- image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
- # Normalize the first two columns independently
- for j in range(2):
- min_pixel_value = np.min(data[:, j])
- max_pixel_value = np.max(data[:, j])
- for i in range(num_rows):
- pixel_value = int(np.interp(data[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
- image[i, j] = np.array([pixel_value] * 3)
- # Normalize the last column separately to achieve grayscale
- min_pixel_value_last = np.min(data[:, -1])
- max_pixel_value_last = np.max(data[:, -1])
- for i in range(num_rows):
- pixel_value_last = int(np.interp(data[i][-1], [min_pixel_value_last, max_pixel_value_last], [0, 255]))
- image[i, -1] = np.array([pixel_value_last] * 3)
- # Use the specified color for the last column based on the label
- if label_colors is not None:
- image[i, -1] = label_colors[predictions[i]]
- return image
- # Load data from a file
- #file_path = 'C:/Users/Hynek/Desktop/example4.txt'
- from google.colab import files
- uploaded = files.upload()
- # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače.
- import io
- import pandas as pd
- # Předpokládáme, že jste nahráli CSV soubor
- for fn in uploaded.keys():
- print('User uploaded file "{name}" with length {length} bytes'.format(
- name=fn, length=len(uploaded[fn])))
- path = io.BytesIO(uploaded[fn]) # Pro soubory, které potřebují být čteny jako binární objekty
- df = pd.read_csv(path)
- print(df.head()) # Vypíše prvních pět řádků DataFrame
- all_results = []
- import os
- import shutil
- import ast
- for filename in uploaded.keys():
- original_path = f"/content/{filename}"
- destination_path = os.path.join("/content/", "/content/DATA2")
- shutil.move(original_path, destination_path)
- print(f"Soubor {filename} byl přesunut do {destination_path}")
- file_path = '/content/DATA2' # Cesta k souboru
- with open(file_path, 'r') as file:
- code = file.read()
- A_list = ast.literal_eval(code)
- # Převod na NumPy pole
- A = np.array(A_list)
- # Assign values to variables dynamically based on the rows of matrix A
- for i, row in enumerate(A, start=1):
- globals()[f"person{i}_results"] = list(row)
- # Print the assigned variables
- for i in range(1, len(A) + 1):
- all_results.append(f"person{i}_results")
- result_variables = []
- # Loop through the variable names and get the corresponding variables using globals()
- for var_name in all_results:
- result_variables.append(globals()[var_name])
- # Now, result_variables contains the variables with names specified in variable_names
- all_results = result_variables
- new_persons_results = result_variables
- # Extract the last column (0 or 1) as labels
- labels = [results[-1] for results in all_results]
- # Remove the last column from the dataset
- data = [results[:-1] for results in all_results]
- # Rozdělení dat na trénovací a testovací sady s náhodným mícháním
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42, shuffle=True)
- # Normalize the training data
- min_values = np.min(X_train, axis=0)
- max_values = np.max(X_train, axis=0)
- X_train_normalized = (X_train - min_values) / (max_values - min_values)
- # Normalize the testing data using the min and max values of the training data
- X_test_normalized = (X_test - min_values) / (max_values - min_values)
- # Print normalized training data
- print("Normalized Training Data:")
- print(X_train_normalized)
- print("Adenormalized",X_train_normalized*(max_values - min_values)+min_values,"Bdenormalized")
- # Define a simple neural network model
- model = tf.keras.Sequential([
- tf.keras.layers.Dense(128, activation='relu', input_shape=(len(X_train[0]),)),
- tf.keras.layers.Dense(64, activation='relu'),
- tf.keras.layers.Dense(1, activation='sigmoid')
- ])
- # Compile the model
- model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
- # Lists to store accuracy values
- accuracy_history = []
- # Create images for the training data
- image_training = np.zeros((len(X_train), len(X_train[0]) + 1, 3), dtype=np.uint8)
- min_pixel_value = np.min(X_train_normalized)
- max_pixel_value = np.max(X_train_normalized)
- # Populate image_training with consistent gray and red/green colors based on the labels in the last column
- for i, label in enumerate(y_train):
- for j in range(len(X_train[0])):
- pixel_value = int(np.interp(X_train_normalized[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
- image_training[i, j] = np.array([pixel_value] * 3)
- image_training[i, -1] = np.array([128, 128, 128])
- if label == 0:
- image_training[i, -1] = np.array([0, 128, 0])
- elif label == 1:
- image_training[i, -1] = np.array([255, 0, 0])
- from tqdm.notebook import tqdm_notebook
- ###ZDE VLOZTE DATA OD NOVYCH PACIENTU
- # Train the model for 400 epochs
- epochs = 1397
- # Assuming 'new_persons_results' is a list of new persons, where each person is represented as a list of features
- new_persons_results = [
- [23.65780072, 18.8599168 ],
- [22.57372914, 17.96922325],
- [32.55342397, 29.46365141],
- [ 6.71803504, 25.70466547],
- [14.40191857, 16.77085649],
- [17.45790731, 21.76521471],
- [20.02796947, 73.45445955],
- ]
- import sys
- for epoch in tqdm_notebook(range(epochs)):
- history = model.fit(X_train_normalized, np.array(y_train), epochs=1, verbose=0, shuffle=True)
- accuracy_history.append(history.history['accuracy'][0])
- if epoch == 1:
- # Normalize the testing data
- X_test_normalized = (X_test - min_values) / (max_values - min_values)
- y_pred_after_2nd_epoch = model.predict(X_test_normalized)
- y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch]
- image_testing_before_2nd_epoch = create_image(X_test_normalized, y_pred_binary_after_2nd_epoch, label_colors_testing)
- if epoch >= epochs-1:
- print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n")
- sys.stdout.flush()
- # Iterate through new persons
- for idx, personNEW_results in enumerate(new_persons_results, start=1):
- # Ensure that personNEW_results has the same number of features as the model expects
- assert len(personNEW_results) == len(X_train[0]), "Mismatch in the number of features."
- personNEW_results_normalized = (np.array(personNEW_results) - min_values) / (max_values - min_values)
- personNEW_prediction = model.predict(np.array([personNEW_results_normalized]))
- personNEW_label = 1 if personNEW_prediction >= 0.5 else 0
- y_pred_after_50_epochs = model.predict(X_test_normalized)
- y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs]
- image_testing_after_50_epochs = create_image(X_test_normalized, y_pred_binary_after_50_epochs, label_colors_testing)
- # Create an image for the new person
- image_personNEW = create_imageN([personNEW_results_normalized], [personNEW_label], label_colors)
- # Display the images
- plt.figure(figsize=(5, 5))
- plt.imshow(image_personNEW)
- plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}")
- plt.axis("off")
- plt.show()
- # Display the images
- plt.figure(figsize=(25, 15))
- plt.subplot(2, 2, 1)
- plt.imshow(image_training)
- plt.title("Training Data")
- plt.axis("off")
- plt.subplot(2, 2, 2)
- plt.imshow(image_testing_before_2nd_epoch)
- plt.title("Testing Data (2nd Epoch)")
- plt.axis("off")
- plt.subplot(2, 2, 3)
- plt.imshow(image_testing_after_50_epochs)
- plt.title(f"Testing Data ({epochs} Epochs)")
- plt.axis("off")
- plt.subplot(2, 2, 4)
- plt.imshow(image_personNEW)
- plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]")
- plt.axis("off")
- # Plot accuracy history
- plt.figure(figsize=(12, 5))
- plt.plot(range(1, epochs + 1), accuracy_history, marker='o')
- plt.title('Accuracy Over Epochs')
- plt.xlabel('Epochs')
- plt.ylabel('Accuracy')
- plt.grid()
- # Print normalized data
- print("Normalized PersonNEW Data:")
- print(personNEW_results_normalized)
- plt.show()
- print("X_train before normalization:")
- print(X_train)
- print("X_test before normalization:")
- print(X_test)
- import seaborn as sns
- print("KKKKKKKKKKKKKKKKKKKKKKKKKKKKKKK")
- print(X_test)
- print("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH")
- print(X_train)
- print("LLLLLLLLLLLLLLLLLLLLLLLLLLLLL")
- from sklearn.metrics import confusion_matrix
- from tensorflow.keras.utils import to_categorical
- np.set_printoptions(threshold=np.inf, precision=4, suppress=True)
- # Generate predictions from the model
- predictions = (model.predict(X_test_normalized) > 0.5).astype(int)
- # Convert y_test to a numpy array and then to binary labels
- y_test_array = np.array(y_test) # Convert y_test to a numpy array
- y_test_binary = (y_test_array > 0.5).astype(int) # Convert to binary
- # Compute the confusion matrix
- conf_matrix = confusion_matrix(y_test_binary, predictions)
- # Evaluate the model's performance
- accuracy = accuracy_score(y_test_binary, predictions)
- precision = precision_score(y_test_binary, predictions)
- recall = recall_score(y_test_binary, predictions)
- f1 = f1_score(y_test_binary, predictions)
- # Display the confusion matrix
- sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
- plt.xlabel('Predicted')
- plt.ylabel('Actual')
- plt.title('Confusion Matrix')
- plt.show()
- print(f"Accuracy: {accuracy:.4f}")
- print(f"Precision: {precision:.4f}")
- print(f"Recall: {recall:.4f}")
- print(f"F1 Score: {f1:.4f}")
- print(f"Confusion Matrix2122:\n{conf_matrix}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement