Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def knn(features, train_features, train_target, k=1):
- '''
- Предсказание целевого признака для данных features -- pandas-таблица данных.
- на основе обучающего набора данных train_features, train_target.
- Возвращает pandas.Series с теми же индексами, что и у features.
- Число k -- количество соседей.
- '''
- predicted_list = []
- features['Is female'] = (features['Sex'] == 'female') * 1
- train_features['Is female'] = (features['Sex'] == 'female') * 1
- columns = ['Age', 'Is female', 'Pclass', 'SibSp', 'Parch', 'Fare']
- A, B = train_features[columns].to_numpy(), features[columns].to_numpy()
- dist = ((B.reshape(B.shape[0], B.shape[1], 1) - A.T.reshape(1, A.shape[1], A.shape[0])) ** 2).sum(axis=1)
- nearest_people_indices = (dist.argsort(axis=1))[:, :k]
- for i in range(B.shape[0]):
- neighbours_surv = train_target.iloc[nearest_people_indices[i]].sum()
- if neighbours_surv >= k / 2:
- predicted_list.append(1)
- else:
- predicted_list.append(0)
- predicted = pd.Series(predicted_list)
- return predicted
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement