Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- text = '''
- 2018-05-27T00:12:43Z 20 21
- 2018-05-27T00:17:27Z 20 22
- 2018-05-27T00:17:59Z 20 24
- 2018-05-27T00:20:01Z 20 21
- 2018-05-27T00:23:14Z 20 24
- 2018-05-28T09:39:07Z 20 22
- 2018-05-28T10:40:17Z 20 23
- 2018-05-28T20:12:47Z 20 25
- 2018-05-28T20:14:16Z 23 25
- 2018-05-30T20:29:30Z 18 24
- '''
- import pandas as pd
- import numpy as np
- data = []
- for line in text.split('\n'):
- line = line.strip()
- if line:
- parts = line.split(' ')
- data.append(parts)
- df = pd.DataFrame(data, columns=['datetime', 'env', 'user'])
- df['datetime'] = pd.to_datetime(df['datetime'])
- df['minutes'] = df['datetime'].dt.time.apply(lambda x:x.hour*60 + x.minute)
- df['env'] = df['env'].astype('int')
- df['user'] = df['user'].astype('int')
- print(df.dtypes)
- print(df)
- import sklearn.linear_model
- import sklearn.model_selection
- import sklearn.tree
- X = df[['minutes', 'env']]
- y = df['user']
- X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size=0.2, random_state=12)
- print("--- LinearRegression ---")
- model = sklearn.linear_model.LinearRegression()
- model.fit(X_train, y_train)
- predict = model.predict(X_test)
- print('diff:\n', predict - y_test)
- print('MSE:', sklearn.metrics.mean_squared_error(y_test, predict))
- print("--- DecisionTreeRegressor ---")
- model = sklearn.tree.DecisionTreeRegressor()
- model.fit(X_train, y_train)
- predict = model.predict(X_test)
- print('diff:\n', predict - y_test)
- print('MSE:', sklearn.metrics.mean_squared_error(y_test, predict))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement