Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Navod na pouziti, Mgr. Hynek Mlčoušek, v Brne 2.5.2024
- # Ulozte do lokalniho souboru u sebe na PC data tohoto tvaru vzdy ukoncene 0 ci 1 (jde o uceni s ucitelem: 1 = nemocny, 0 = prezil/zdravy, ve vystupu bude zelena znacit 0, cervena 1) a bez znaku #; pozor na ","
- # [ [23.657800719276743,18.859916797201468,0],
- # [22.573729142097473,17.96922325097786,0],
- # [32.55342396968757,29.463651408558803,0],
- # [6.718035041529263,25.704665468161718,1],
- # [14.401918566243225,16.770856492924658,0],
- # [17.457907312962234,21.76521470574044,0],
- # [20.02796946568093,73.45445954770891,1],
- # [30.295138369778076,62.901112886193246,1],
- # [15.128977804449633,32.40267702110393,0],
- # [30.179457395820013,58.982492125646104,1],
- # [28.01649701854089,63.92781357637711,1],
- # [16.791838457871147,42.33482314089884,0],
- # [10.583694293380976,19.61926728942497,0],
- # [26.634447074406467,91.96624817360987,1],
- # [26.217868623367643,36.400293587062976,0],
- # [17.689396788624936,60.79797114006423,1],
- # [33.17193822527976,66.75277364959176,1],
- # [23.793952755709153,22.57501437360518,0]]
- # kliknete na cerne tlacitko s trojuhelnickem vlevo nahore
- # pod kodem se objevi moznost spustit dialogove okenko, kliknete na nej
- # soubor, ktery mate z bodu vyse vyberte a nahrajte
- # Najdete v tomto kodu retezec:
- ### ZDE VLOZTE DATA OD NOVYCH PACIENTU
- # Vlozte do pole
- # new_persons_results = []
- # data o nekolika malo novych pacientech bez ukoncovaci 0 a 1, ale se stejnym poctem sloupcu jako ma soubor z Vaseho lokalniho disku, vyse by tedy toto bylo rovno 2
- # kod vyhodi hned po natrenovani, (jehoz prubeh muzete sledovat na modre progres bare) pro kazdy radek z new_persons_results bilo-sedo-cerne ctverecky vznikle z normalizace poskytnutych dat a ukoncovaci ctverecek cerveny pripadne zeleny
- # zaroven s tim se vypise realne cislo mezi 0 a 1 znacici jak moc je pacient zdravy (blizke 0) ci nemocny (blizke 1)
- # cisla uprostred pak indikuji zadany oranzovy semafor.
- # je na lekarich nastavit tresholdy (tedy pravdepodobnosti: cisla mezi 0 a 1) ktere pak daji zaver, zda je pacient cerveny, oranzovy ci zeleny
- # prosim o komnetare a vysledky na realnych datech, je zadouci aby radku v matici, tedy pacientu byly stovky a sloupcu desitky
- # Moznosti vyuziti: onkologicka diagnoza vs. zdrava kontorlni skupina, diabetes (pritomnost/nepritomnost), testovani noveho leku oproti placebu atd.
- # kod zaroven vyhodi confusion matici, tedy mozne True Negative a False Positive plus spravne zarazene hodnoty spolu s presnosti,F1 score recall atd.
- # poznamka ke kodu: jde o epxerimentalni verzi, ktera krome skutecne potrebneho kodu obsahuje ladici informace, ruzne duplicity, nadbytecne prikazy atd.
- # Na uvod behu programu se pro kontorlu vypise poskytnuta matice a jeji normalizovana verze, je treba sjet jezdcem napravo nize na obrazky a dalsi vystupy
- # Dekuji profesoru Petru Dostalovi za namet k teto praci a poskytnuta data, byt je potreba mit data realna
- import numpy as np
- import matplotlib.pyplot as plt
- import tensorflow as tf
- from tqdm import tqdm
- from sklearn.preprocessing import StandardScaler
- from IPython.display import display
- from IPython.display import Javascript
- display(Javascript('IPython.OutputArea.auto_scroll_threshold = 9999;'))
- label_colors = {0: [0, 128, 0], 1: [255, 0, 0]}
- label_colors_testing = {0: [0, 128, 0], 1: [255, 0, 0]}
- %matplotlib inline
- # Function to create images based on predictions
- def create_image(data, predictions):
- num_rows, num_columns = len(data), len(data[0])
- image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
- for i in range(num_rows):
- for j in range(num_columns):
- pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
- image[i, j] = np.array([pixel_value] * 3)
- # Create a gradient based on the normalized values
- gradient_value = int(np.interp(predictions[i], [0, 1], [0, 255]))
- image[i, -1] = np.array([gradient_value] * 3)
- return image
- def create_image(data, predictions, label_colors):
- num_rows, num_columns = len(data), len(data[0])
- image = np.zeros((num_rows, num_columns + 1, 3), dtype=np.uint8)
- for i in range(num_rows):
- for j in range(num_columns):
- pixel_value = int(np.interp(data[i][j], [np.min(data), np.max(data)], [0, 255]))
- image[i, j] = np.array([pixel_value] * 3)
- # Use the specified color for the last column based on the label
- image[i, -1] = label_colors[predictions[i]]
- return image
- def create_imageN(data, predictions, label_colors=None):
- num_training_rows = len(data) # Set the number of rows based on the data
- num_columns = len(data[0])
- image_training = np.zeros((num_training_rows, num_columns + 1, 3), dtype=np.uint8)
- min_pixel_value = np.min(X_train_normalized)
- max_pixel_value = np.max(X_train_normalized)
- for i in range(num_training_rows):
- # Normalize the first columns independently
- for j in range(num_columns):
- pixel_value = int(np.interp(data[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
- image_training[i, j] = np.array([pixel_value] * 3)
- # Normalize the last column separately to achieve grayscale
- pixel_value_last = int(np.interp(data[i][-1], [min_pixel_value, max_pixel_value], [0, 255]))
- image_training[i, -1] = np.array([pixel_value_last] * 3)
- # Use the specified color for the last column based on the label
- if label_colors is not None:
- image_training[i, -1] = label_colors[predictions[i]]
- return image_training
- # Load data from a file
- from google.colab import files
- import io
- import pandas as pd
- import os
- import shutil
- import ast
- uploaded = files.upload()
- # Tento kód otevře dialogové okno pro výběr souboru z vašeho počítače.
- # Předpokládáme, že jste nahráli CSV soubor
- for fn in uploaded.keys():
- print('User uploaded file "{name}" with length {length} bytes'.format(
- name=fn, length=len(uploaded[fn])))
- path = io.BytesIO(uploaded[fn]) # Pro soubory, které potřebují být čteny jako binární objekty
- df = pd.read_csv(path)
- print(df.head()) # Vypíše prvních pět řádků DataFrame
- all_results = []
- for filename in uploaded.keys():
- original_path = f"/content/{filename}"
- destination_path = os.path.join("/content/", "/content/DATA2")
- shutil.move(original_path, destination_path)
- print(f"Soubor {filename} byl přesunut do {destination_path}")
- file_path = '/content/DATA2' # Cesta k souboru
- with open(file_path, 'r') as file:
- code = file.read()
- A_list = ast.literal_eval(code)
- # Převod na NumPy pole
- A = np.array(A_list)
- # Assign values to variables dynamically based on the rows of matrix A
- for i, row in enumerate(A, start=1):
- globals()[f"person{i}_results"] = list(row)
- # Print the assigned variables
- for i in range(1, len(A) + 1):
- all_results.append(f"person{i}_results")
- result_variables = []
- # Loop through the variable names and get the corresponding variables using globals()
- for var_name in all_results:
- result_variables.append(globals()[var_name])
- # Now, result_variables contains the variables with names specified in variable_names
- all_results = result_variables
- new_persons_results = result_variables
- # Extract the last column (0 or 1) as labels
- labels = [results[-1] for results in all_results]
- # Remove the last column from the dataset
- data = [results[:-1] for results in all_results]
- # Define the number of rows for training and testing
- num_training_rows = 50
- num_testing_rows = 50
- # Split the data into training and testing datasets
- X_train, X_test, y_train, y_test = data[:num_training_rows], data[:num_testing_rows], labels[:num_training_rows], labels[:num_testing_rows]
- # Normalize the data using StandardScaler
- scaler = StandardScaler()
- X_train_normalized = scaler.fit_transform(X_train)
- # Print normalized training data
- print("Normalized Training Data:")
- print(X_train_normalized)
- print("Adenormalized (Z-score):", X_train_normalized * scaler.scale_ + scaler.mean_, "Bdenormalized")
- # Define a simple neural network model
- model = tf.keras.Sequential([
- tf.keras.layers.Dense(128, activation='relu', input_shape=(len(X_train[0]),)),
- tf.keras.layers.Dense(64, activation='relu'),
- tf.keras.layers.Dense(1, activation='sigmoid')
- ])
- # Compile the model
- model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
- # Lists to store accuracy values
- accuracy_history = []
- # Create images for the training data
- image_training = np.zeros((num_training_rows, len(X_train[0]) + 1, 3), dtype=np.uint8)
- min_pixel_value = np.min(X_train_normalized)
- max_pixel_value = np.max(X_train_normalized)
- # Populate image_training with consistent gray and red/green colors based on the labels in the last column
- for i, label in enumerate(y_train):
- for j in range(len(X_train[0])):
- pixel_value = int(np.interp(X_train_normalized[i][j], [min_pixel_value, max_pixel_value], [0, 255]))
- image_training[i, j] = np.array([pixel_value] * 3)
- image_training[i, -1] = np.array([128, 128, 128])
- if label == 0:
- image_training[i, -1] = np.array([0, 128, 0])
- elif label == 1:
- image_training[i, -1] = np.array([255, 0, 0])
- from tqdm.notebook import tqdm_notebook
- # Train the model for 400 epochs
- epochs = 1397
- new_persons_results = [
- [0.030391238492519845, 0.23021081913032299, 0.4743575198860915, 0.639395348276238],
- [0.19790381537769108, 0.37639843860181527, 0.5676528538456297, 0.716530820399044],
- [0.0035245462826666075, 0.23127629815305784, 0.4802171123709532, 0.6591272725083992],
- [0.059230621364548486, 0.24424510845680134, 0.442553808602372, 0.6891856336835676],
- [0.05536813173866345, 0.2538888869331579, 0.47861285542743165, 0.6200559751500355],
- [0.1300359168058454, 0.38443677757577344, 0.5957238735056223, 0.795823160451845],
- [0.1743368240338569, 0.3713129035302336, 0.5640350202165867, 0.7213527928848786],
- [0.09173335232875372, 0.2559096689549753, 0.49527436563146954, 0.6970388573439903],
- [0.015235204378572087, 0.2284904031445293, 0.46613902406934005, 0.6917336579549159],
- [0.0011416656054787145, 0.24567669307188245, 0.4388400949432476, 0.667323193441009],
- [0.06448448763849592, 0.2115323519931734, 0.43540989127902197, 0.6438994375658477],
- [0.1281083326647467, 0.319011666415554, 0.5081581898266203, 0.7238539046118706],
- [0.031400839963864634, 0.291826671945583, 0.44935681772218605, 0.6775565554946026],
- [0.06087306495870359, 0.23991257024083634, 0.4485025638007111, 0.680857926545652],
- [0.16944969856928027, 0.3433985701275623, 0.5739960718239413, 0.7587431345359652],
- [0.005679255126553562, 0.2703897890888177, 0.47083369294815347, 0.629981449029764],
- [0.1361411186548812, 0.3699350229482504, 0.5880045061520169, 0.709568518897945],
- [0.07538207440920129, 0.20062324901664458, 0.40581823748211543, 0.6337591072862666],
- [0.017969960867618918, 0.21435679119605028, 0.4881930298975361, 0.668393388428822],
- [0.08565125103289023, 0.29383944243424687, 0.4732898824502158, 0.6500725888934386],
- [0.08477898050514117, 0.21919257927575692, 0.49117946288913483, 0.6532321710710468],
- [0.18637585263771708, 0.30107373793178105, 0.5235238878093704, 0.7912391738401261],
- ]
- import sys
- for epoch in tqdm_notebook(range(epochs)):
- history = model.fit(X_train_normalized, np.array(y_train), epochs=1, verbose=0, shuffle=False)
- accuracy_history.append(history.history['accuracy'][0])
- if epoch == 1:
- # Normalize the testing data
- X_test_normalized = scaler.transform(X_test)
- y_pred_after_2nd_epoch = model.predict(X_test_normalized)
- y_pred_binary_after_2nd_epoch = [1 if pred >= 0.5 else 0 for pred in y_pred_after_2nd_epoch]
- image_testing_before_2nd_epoch = create_image(X_test_normalized, y_pred_binary_after_2nd_epoch, label_colors_testing)
- if epoch >= epochs-1:
- print(f"HERE HERE Epoch: {epoch}, Epochs: {epochs}\n")
- sys.stdout.flush()
- # Iterate through new persons
- for idx, personNEW_results in enumerate(new_persons_results, start=1):
- # Ensure that personNEW_results has the same number of features as the model expects
- assert len(personNEW_results) == len(X_train[0]), "Mismatch in the number of features."
- personNEW_results_normalized = scaler.transform([personNEW_results])
- personNEW_prediction = model.predict(np.array([personNEW_results_normalized]))
- personNEW_label = 1 if personNEW_prediction >= 0.5 else 0
- y_pred_after_50_epochs = model.predict(X_test_normalized)
- y_pred_binary_after_50_epochs = [1 if pred >= 0.5 else 0 for pred in y_pred_after_50_epochs]
- image_testing_after_50_epochs = create_image(X_test_normalized, y_pred_binary_after_50_epochs, label_colors_testing)
- # Create an image for the new person
- image_personNEW = create_imageN([personNEW_results_normalized[0]], [personNEW_label], label_colors)
- # Display the images
- plt.figure(figsize=(5, 5))
- plt.imshow(image_personNEW)
- plt.title(f"New Person {idx}\nLabel: {personNEW_label}, Prediction: {personNEW_prediction}")
- plt.axis("off")
- plt.show()
- # Display the images
- plt.figure(figsize=(25, 15))
- plt.subplot(2, 2, 1)
- plt.imshow(image_training)
- plt.title("Training Data")
- plt.axis("off")
- plt.subplot(2, 2, 2)
- plt.imshow(image_testing_before_2nd_epoch)
- plt.title("Testing Data (2nd Epoch)")
- plt.axis("off")
- plt.subplot(2, 2, 3)
- plt.imshow(image_testing_after_50_epochs)
- plt.title(f"Testing Data ({epochs} Epochs)")
- plt.axis("off")
- plt.subplot(2, 2, 4)
- plt.imshow(image_personNEW)
- plt.title(f"New Person\nLabel: {personNEW_label},[{personNEW_prediction}]")
- plt.axis("off")
- # Plot accuracy history
- plt.figure(figsize=(12, 5))
- plt.plot(range(1, epochs + 1), accuracy_history, marker='o')
- plt.title('Accuracy Over Epochs')
- plt.xlabel('Epochs')
- plt.ylabel('Accuracy')
- plt.grid()
- # Print normalized data
- print("Normalized PersonNEW Data:")
- print(personNEW_results_normalized)
- plt.show()
- print("X_train before normalization:")
- print(X_train)
- print("X_test before normalization:")
- print(X_test)
- import seaborn as sns
- from sklearn.metrics import confusion_matrix
- from tensorflow.keras.utils import to_categorical
- np.set_printoptions(threshold=np.inf, precision=4, suppress=True)
- # Train the model
- print("Training Start")
- for epoch in tqdm_notebook(range(1000), desc="Training Progress"):
- model.fit(np.array(X_train_normalized), np.array(y_train), epochs=1, verbose=0)
- print("Training Complete")
- # Generate predictions from the model
- predictions = (model.predict(X_test_normalized) > 0.5).astype(int)
- # Convert y_test to a numpy array and then to binary labels
- y_test_array = np.array(y_test) # Convert y_test to a numpy array
- y_test_binary = (y_test_array > 0.5).astype(int) # Convert to binary
- # Compute the confusion matrix
- conf_matrix = confusion_matrix(y_test_binary, predictions)
- # Evaluate the model's performance
- accuracy = accuracy_score(y_test_binary, predictions)
- precision = precision_score(y_test_binary, predictions)
- recall = recall_score(y_test_binary, predictions)
- f1 = f1_score(y_test_binary, predictions)
- # Display the confusion matrix
- sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
- plt.xlabel('Predicted')
- plt.ylabel('Actual')
- plt.title('Confusion Matrix')
- plt.show()
- print(f"Accuracy: {accuracy:.4f}")
- print(f"Precision: {precision:.4f}")
- print(f"Recall: {recall:.4f}")
- print(f"F1 Score: {f1:.4f}")
- print(f"Confusion Matrix:\n{conf_matrix}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement