Advertisement
pavelperc

ml_hw4

Mar 10th, 2019
209
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.24 KB | None | 0 0
  1. # Class for simple gradient descent
  2. class MyVanillaGD(BaseEstimator, ClassifierMixin):
  3.     def __init__(self, beta=1.0, gamma=1.0, tol=0.01, max_epoch=1000, random_state=123, step=0.05, debug=False):  
  4.         self.beta = beta        
  5.         self.gamma = gamma
  6.         self.tol = tol
  7.         self.max_epoch = max_epoch
  8.         self.random_state = random_state
  9.        
  10.         # Fill in
  11.         self.step = step
  12.         self.debug = debug
  13.    
  14.    
  15.     def fit(self, X, y):
  16.        
  17.         X, y = sklearn.utils.shuffle(X, y, random_state=self.random_state)
  18.        
  19.         np.random.seed(self.random_state)
  20.        
  21.         self.coef_ = np.random.normal(size=X.shape[1], scale=X.shape[1])
  22.         self.intercept_ = np.random.normal(scale=X.shape[1])
  23.          
  24. #         self.coef_ = np.zeros(X.shape[1])
  25. #         self.intercept_ = 0.0
  26.        
  27.         self.grad_descent(X, y)
  28.    
  29.        
  30.     def grad_descent(self, X, y):
  31.         qual = np.zeros(self.max_epoch)
  32.         self.steps = self.max_epoch
  33.        
  34.         for iterations in range(self.max_epoch):
  35.             old_loss = self.loss(X, y)
  36.            
  37.             new_loss = self.gradient_step(X, y, self.step)
  38.            
  39.            
  40.             qual[iterations] = new_loss
  41.             if np.abs(old_loss - new_loss) < self.tol:
  42.                 self.steps = iterations + 1
  43.                 break
  44.         self.qual = qual
  45.    
  46.    
  47.        
  48.     def gradient_step(self, X, y, step):
  49.         # solution 1
  50. #         grad_coef = -y / (1 + np.exp((X.dot(self.coef_) + self.intercept_) * y))
  51. #         mean_grad = np.mean(X.T * grad_coef, axis = 1)
  52.  
  53. #         penalty = np.sign(self.coef_) * self.beta + 2 * self.coef_ * self.gamma
  54. #         grad = mean_grad + penalty
  55.  
  56. #         intercept_penalty = np.sign(self.intercept_) * self.beta + 2 * self.intercept_ * self.gamma
  57. #         intercept_grad = np.mean(grad_coef) + intercept_penalty
  58.  
  59.  
  60. #         self.coef_ -= step * mean_grad
  61. #         self.intercept_ -= step * intercept_grad
  62. #         new_loss = self.loss(X, y)
  63.            
  64.            
  65. #             #solution 1.2 - exactly like in the formula, no vectors
  66. #             n = X.shape[0]
  67. #             w = X.shape[1]
  68.            
  69. #             grad = np.zeros(w)
  70. #             intercept_grad = 0.0
  71.            
  72. #             for i in range(n):
  73. #                 grad += -y[i]*X[i] / (1+np.exp(y[i]*(self.coef_.T@X[i]+self.intercept_)))
  74. #                 intercept_grad += -y[i]/ (1+np.exp(y[i]*(self.coef_.T@X[i]+self.intercept_)))
  75. #             grad /= n
  76. #             intercept_grad /= n
  77.            
  78. #             #penalty
  79. #             grad += np.sign(self.coef_) * self.beta + 2 * self.coef_ * self.gamma
  80.            
  81. #             intercept_grad += np.sign(self.intercept_) * self.beta + 2 * self.intercept_ * self.gamma
  82.            
  83. #             old_loss = self.loss(X, y)
  84. #             self.coef_ -= step * grad
  85. #             self.intercept_ -= step * intercept_grad
  86. #             new_loss = self.loss(X, y)
  87.            
  88.            
  89.             # solution 2 - like in seminar
  90.             # we use an inferred function, but before fraction reducing
  91.            
  92.             #calculate gradient
  93.             decision_function = -(X.dot(self.coef_) + self.intercept_)
  94.             grad_coef = -np.exp(decision_function * y) / (1 + np.exp(decision_function * y))
  95.             grad_coef *= y
  96.             mean_grad = np.mean(X.T * grad_coef, axis = 1)
  97.  
  98.             w = np.copy(self.coef_)
  99.             self.coef_ -= step * mean_grad
  100.            
  101.             w_0 = self.intercept_
  102.             self.intercept_ -= step * np.mean(grad_coef)
  103.    
  104.             #add penalty
  105.             self.coef_ -= 2 * step * w /  X.shape[0]
  106.                
  107.             self.intercept_ -= 2 * step * w_0 / X.shape[0]
  108.             self.coef_ -= step * np.sign(w) / X.shape[0]
  109.                
  110.             self.intercept_ -= step * np.sign(w_0) / X.shape[0]
  111.            
  112.             new_loss = self.loss(X, y)
  113.            
  114.         if self.debug:
  115.             print('coefs:',self.coef_, ' intercept:',self.intercept_, ' new_loss:', new_loss)
  116.  
  117.         return new_loss
  118.    
  119.    
  120.     #labels of classes {1, -1}
  121.     def loss(self, X, y):
  122.         return np.mean(np.log(1 + np.exp(-y*(X.dot(self.coef_) + self.intercept_))))\
  123.                               + self.gamma * np.sum(np.abs(self.coef_))\
  124.                               + self.beta * np.sum(self.coef_ ** 2)
  125.    
  126.    
  127.     def predict(self, X, sensitivity=0.5):
  128.         predict = (self.predict_proba(X)[1, :] > sensitivity).astype(int)
  129.        
  130.         #transform 0 to -1
  131.         predict[predict == 0] = -1
  132.        
  133.         return predict
  134.        
  135.     def predict_proba(self, X):
  136.         return np.array([1 / (1 + np.exp(X.dot(self.coef_) + self.intercept_)),\
  137.                          1 / (1 + np.exp(-X.dot(self.coef_) - self.intercept_))])
  138.  
  139.        
  140. # Class for SGD or mini-batch SGD
  141. class MySGD(MyVanillaGD):
  142.     def __init__(self, beta=1.0, gamma=1.0, tol=0.01, max_epoch=1000, random_state=123, step=0.05,  batch_size=128, debug=False):  
  143.         self.beta = beta        
  144.         self.gamma = gamma
  145.         self.tol = tol
  146.         self.max_epoch = max_epoch
  147.         self.random_state = random_state
  148.         # Fill in
  149.         self.step = step
  150.         self.batch_size = batch_size
  151.         self.debug = debug
  152.    
  153.    
  154.     def grad_descent(self, X, y):
  155.         qual = []
  156.        
  157.         self.steps = self.max_epoch * (X.shape[0] / self.batch_size)
  158.         for iterations in range(self.max_epoch):
  159.            
  160.             batches = zip(np.array_split(X, X.shape[0] / self.batch_size),\
  161.                           np.array_split(y, X.shape[0] / self.batch_size))
  162.             stop = False
  163.             for xbatch, ybatch in batches:
  164.                 old_loss = self.loss(X, y)
  165.                 self.gradient_step(xbatch, ybatch, self.step / (iterations + 1)) # custom steps!!
  166.                 new_loss = self.loss(X, y)
  167.                
  168.                 qual.append(new_loss)
  169.                 if np.abs(old_loss - new_loss) < self.tol:
  170.                     stop = True
  171.                     break
  172.             if stop:
  173.                 self.steps = iterations + 1
  174.                 break
  175.                
  176.         self.qual = np.array(qual)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement