Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def cycle(self, X_train, y_train, X_val, y_val, best_score, l1_lambda=0.001, l2_lambda=0.001):
- model = Agent().to(d)
- X_train, y_train, X_val, y_val = X_train.to(
- d), y_train.to(d), X_val.to(d), y_val.to(d)
- # Weight initialization
- try:
- weights_path = "./zlv7_full.pt"
- state_dict = torch.load(weights_path, map_location=d)
- model.load_state_dict(state_dict)
- except FileNotFoundError:
- for m in model.modules():
- if isinstance(m, nn.Linear):
- nn.init.xavier_uniform_(m.weight)
- if m.bias is not None:
- nn.init.constant_(m.bias, 0)
- # loss function and optimizer
- loss_fn = nn.MSELoss() # mean square error
- # Set weight_decay to 0 for L2 regularization
- optimizer = optim.AdamW(model.parameters(), lr=5e-6, weight_decay=0)
- scheduler = optim.lr_scheduler.ReduceLROnPlateau(
- optimizer, factor=0.98, patience=3, verbose=True
- )
- n_epochs = 300
- batch_size = 8192 # size of each batch
- batch_start = torch.arange(0, len(X_train), batch_size)
- # Hold the best model
- best_mse = np.inf # initialise value as infinite
- best_weights = None
- history = []
- accumulation_steps = 2 # accumulate gradients over 2 batches
- for _ in tqdm.tqdm(range(n_epochs), desc="Epochs"):
- model.train()
- epoch_loss = 0.0
- for i, batch_idx in enumerate(batch_start):
- batch_X, batch_y = (
- X_train[batch_idx: batch_idx + batch_size],
- y_train[batch_idx: batch_idx + batch_size],
- )
- optimizer.zero_grad()
- y_pred = model.forward(batch_X).to(d)
- loss = loss_fn(y_pred, batch_y.view(-1, 1)).to(d)
- # L1 regularization
- l1_reg = torch.tensor(0.).to(d)
- for name, param in model.named_parameters():
- if 'weight' in name:
- l1_reg += torch.norm(param, 1)
- loss += l1_lambda * l1_reg
- # L2 regularization
- l2_reg = torch.tensor(0.).to(d)
- for name, param in model.named_parameters():
- if 'weight' in name:
- l2_reg += torch.norm(param, 2)
- loss += l2_lambda * l2_reg
- if d == torch.device("cuda"):
- scaler.scale(loss).backward() # NEED GPU
- # accumulate gradients over several batches
- if (i + 1) % accumulation_steps == 0 or (i + 1) == len(batch_start):
- scaler.step(optimizer) # NEED GPU
- scaler.update() # NEED GPU
- model.zero_grad()
- y_pred = model(batch_X).to(d)
- loss = loss_fn(y_pred, batch_y.view(-1, 1)).to(d)
- # L1 regularization
- l1_reg = torch.tensor(0.).to(d)
- for name, param in model.named_parameters():
- if 'weight' in name:
- l1_reg += torch.norm(param, 1)
- loss += l1_lambda * l1_reg
- # L2 regularization
- l2_reg = torch.tensor(0.).to(d)
- for name, param in model.named_parameters():
- if 'weight' in name:
- l2_reg += torch.norm(param, 2)
- loss += l2_lambda * l2_reg
- loss.backward()
- optimizer.step()
- epoch_loss += loss.item() * batch_X.shape[0]
- epoch_loss /= len(X_train)
- scheduler.step(epoch_loss)
- history.append(epoch_loss)
- if epoch_loss < best_mse:
- best_mse = epoch_loss
- print("MSE: %.2f" % best_mse)
- print("RMSE: %.2f" % np.sqrt(best_mse))
- plt.plot(history)
- plt.title("Epoch loss for ZL")
- plt.xlabel("Number of Epochs")
- plt.ylabel("Epoch Loss")
- plt.draw()
- plt.savefig("ai-eval-losses.jpg")
- model.eval()
- with torch.no_grad():
- # Test out inference with 5 samples
- for i in range(5):
- X_sample = X_val[i: i + 1]
- X_sample = X_sample.clone().detach().to(d)
- y_pred = model.forward(X_sample).to(d)
- best_weights = copy.deepcopy(model.state_dict())
- torch.save(best_weights, "zlv7_full.pt")
- if best_score > epoch_loss:
- best_weights = copy.deepcopy(model.state_dict())
- torch.save(best_weights, "zlv7_full.pt")
- torch.cuda.empty_cache()
- del X_train
- del X_val
- del y_train
- del y_val
- gc.enable()
- gc.collect()
- gc.disable()
- return epoch_loss
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement