Advertisement
RupeshAcharya60

text extraction

May 1st, 2023
691
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.04 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd
  3.  
  4. # Load the property title dataset
  5. titles = pd.read_csv('property_titles.csv')
  6.  
  7. # Preprocess the data
  8. corpus = titles['description']
  9. vectorizer = CountVectorizer(stop_words='english')
  10. X = vectorizer.fit_transform(corpus).toarray()
  11. y = titles['title']
  12.  
  13. # Split the dataset into training and testing sets
  14. np.random.seed(42)
  15. indices = np.random.permutation(len(X))
  16. split = int(0.8 * len(X))
  17. train_indices, test_indices = indices[:split], indices[split:]
  18. X_train, X_test = X[train_indices], X[test_indices]
  19. y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
  20.  
  21. # Train a neural network on the training set
  22. input_size = X_train.shape[1]
  23. hidden_size = 100
  24. output_size = len(y_train.unique())
  25.  
  26. np.random.seed(42)
  27. W1 = np.random.randn(input_size, hidden_size)
  28. W2 = np.random.randn(hidden_size, output_size)
  29.  
  30. def relu(x):
  31.     return np.maximum(x, 0)
  32.  
  33. def softmax(x):
  34.     exps = np.exp(x - np.max(x, axis=1, keepdims=True))
  35.     return exps / np.sum(exps, axis=1, keepdims=True)
  36.  
  37. def forward(X, W1, W2):
  38.     Z1 = X.dot(W1)
  39.     A1 = relu(Z1)
  40.     Z2 = A1.dot(W2)
  41.     A2 = softmax(Z2)
  42.     return Z1, A1, Z2, A2
  43.  
  44. def backward(X, y, Z1, A1, Z2, A2, W2, lr=0.1):
  45.     Y = pd.get_dummies(y).values
  46.     dZ2 = A2 - Y
  47.     dW2 = A1.T.dot(dZ2)
  48.     dA1 = dZ2.dot(W2.T)
  49.     dZ1 = dA1 * (Z1 > 0)
  50.     dW1 = X.T.dot(dZ1)
  51.     W1 -= lr * dW1
  52.     W2 -= lr * dW2
  53.  
  54. epochs = 1000
  55. for epoch in range(epochs):
  56.     Z1, A1, Z2, A2 = forward(X_train, W1, W2)
  57.     backward(X_train, y_train, Z1, A1, Z2, A2, W2)
  58.     if epoch % 100 == 0:
  59.         loss = np.sum(-np.log(A2[np.arange(len(X_train)), y_train])) / len(X_train)
  60.         print(f'Epoch {epoch}, loss = {loss}')
  61.  
  62. # Generate a property title using the trained model
  63. def create_title(area, location, property_type):
  64.     description = f"{property_type} in {location} - {area} sqft"
  65.     X_new = vectorizer.transform([description]).toarray()
  66.     _, _, _, A2 = forward(X_new, W1, W2)
  67.     title_idx = np.argmax(A2)
  68.     title = y.unique()[title_idx]
  69.     return title
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement