Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Class for simple gradient descent
- class MyVanillaGD(BaseEstimator, ClassifierMixin):
- def __init__(self, beta=1.0, gamma=1.0, tol=0.01, max_epoch=1000, random_state=123, step=0.05, debug=False):
- self.beta = beta
- self.gamma = gamma
- self.tol = tol
- self.max_epoch = max_epoch
- self.random_state = random_state
- # Fill in
- self.step = step
- self.debug = debug
- def fit(self, X, y):
- X, y = sklearn.utils.shuffle(X, y, random_state=self.random_state)
- np.random.seed(self.random_state)
- self.coef_ = np.random.normal(size=X.shape[1], scale=X.shape[1])
- self.intercept_ = np.random.normal(scale=X.shape[1])
- # self.coef_ = np.zeros(X.shape[1])
- # self.intercept_ = 0.0
- self.grad_descent(X, y)
- def grad_descent(self, X, y):
- qual = np.zeros(self.max_epoch)
- self.steps = self.max_epoch
- for iterations in range(self.max_epoch):
- old_loss = self.loss(X, y)
- new_loss = self.gradient_step(X, y, self.step)
- qual[iterations] = new_loss
- if np.abs(old_loss - new_loss) < self.tol:
- self.steps = iterations + 1
- break
- self.qual = qual
- def gradient_step(self, X, y, step):
- # solution 1
- # grad_coef = -y / (1 + np.exp((X.dot(self.coef_) + self.intercept_) * y))
- # mean_grad = np.mean(X.T * grad_coef, axis = 1)
- # penalty = np.sign(self.coef_) * self.beta + 2 * self.coef_ * self.gamma
- # grad = mean_grad + penalty
- # intercept_penalty = np.sign(self.intercept_) * self.beta + 2 * self.intercept_ * self.gamma
- # intercept_grad = np.mean(grad_coef) + intercept_penalty
- # self.coef_ -= step * mean_grad
- # self.intercept_ -= step * intercept_grad
- # new_loss = self.loss(X, y)
- # #solution 1.2 - exactly like in the formula, no vectors
- # n = X.shape[0]
- # w = X.shape[1]
- # grad = np.zeros(w)
- # intercept_grad = 0.0
- # for i in range(n):
- # grad += -y[i]*X[i] / (1+np.exp(y[i]*(self.coef_.T@X[i]+self.intercept_)))
- # intercept_grad += -y[i]/ (1+np.exp(y[i]*(self.coef_.T@X[i]+self.intercept_)))
- # grad /= n
- # intercept_grad /= n
- # #penalty
- # grad += np.sign(self.coef_) * self.beta + 2 * self.coef_ * self.gamma
- # intercept_grad += np.sign(self.intercept_) * self.beta + 2 * self.intercept_ * self.gamma
- # old_loss = self.loss(X, y)
- # self.coef_ -= step * grad
- # self.intercept_ -= step * intercept_grad
- # new_loss = self.loss(X, y)
- # solution 2 - like in seminar
- # we use an inferred function, but before fraction reducing
- #calculate gradient
- decision_function = -(X.dot(self.coef_) + self.intercept_)
- grad_coef = -np.exp(decision_function * y) / (1 + np.exp(decision_function * y))
- grad_coef *= y
- mean_grad = np.mean(X.T * grad_coef, axis = 1)
- w = np.copy(self.coef_)
- self.coef_ -= step * mean_grad
- w_0 = self.intercept_
- self.intercept_ -= step * np.mean(grad_coef)
- #add penalty
- self.coef_ -= 2 * step * w / X.shape[0]
- self.intercept_ -= 2 * step * w_0 / X.shape[0]
- self.coef_ -= step * np.sign(w) / X.shape[0]
- self.intercept_ -= step * np.sign(w_0) / X.shape[0]
- new_loss = self.loss(X, y)
- if self.debug:
- print('coefs:',self.coef_, ' intercept:',self.intercept_, ' new_loss:', new_loss)
- return new_loss
- #labels of classes {1, -1}
- def loss(self, X, y):
- return np.mean(np.log(1 + np.exp(-y*(X.dot(self.coef_) + self.intercept_))))\
- + self.gamma * np.sum(np.abs(self.coef_))\
- + self.beta * np.sum(self.coef_ ** 2)
- def predict(self, X, sensitivity=0.5):
- predict = (self.predict_proba(X)[1, :] > sensitivity).astype(int)
- #transform 0 to -1
- predict[predict == 0] = -1
- return predict
- def predict_proba(self, X):
- return np.array([1 / (1 + np.exp(X.dot(self.coef_) + self.intercept_)),\
- 1 / (1 + np.exp(-X.dot(self.coef_) - self.intercept_))])
- # Class for SGD or mini-batch SGD
- class MySGD(MyVanillaGD):
- def __init__(self, beta=1.0, gamma=1.0, tol=0.01, max_epoch=1000, random_state=123, step=0.05, batch_size=128, debug=False):
- self.beta = beta
- self.gamma = gamma
- self.tol = tol
- self.max_epoch = max_epoch
- self.random_state = random_state
- # Fill in
- self.step = step
- self.batch_size = batch_size
- self.debug = debug
- def grad_descent(self, X, y):
- qual = []
- self.steps = self.max_epoch * (X.shape[0] / self.batch_size)
- for iterations in range(self.max_epoch):
- batches = zip(np.array_split(X, X.shape[0] / self.batch_size),\
- np.array_split(y, X.shape[0] / self.batch_size))
- stop = False
- for xbatch, ybatch in batches:
- old_loss = self.loss(X, y)
- self.gradient_step(xbatch, ybatch, self.step / (iterations + 1)) # custom steps!!
- new_loss = self.loss(X, y)
- qual.append(new_loss)
- if np.abs(old_loss - new_loss) < self.tol:
- stop = True
- break
- if stop:
- self.steps = iterations + 1
- break
- self.qual = np.array(qual)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement