Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #
- # I have a site I am working on that has the whole thing including webpage tutorial (well you can cut/paste)
- # can find it here https://buildofflineaiwithmemory.neocities.org/
- # Updates: (contact rickscorpio64@gmail.com)
- #
- # Oct 7, 2024
- # ** WARNING ** REWROTE PYTHON SCRIPT BELOW.
- # Switched persistent memory to Annoy. Chromadb seemed to have issues
- # Now located here for full script and website ui integratoin
- # https://buildofflineaiwithmemory.neocities.org/
- #
- # Oct 3, 2024
- # system prompt is now external file (by request) called "personality.txt" for people that have huge peronality
- # date time added
- # FIX: "updating vector database: Collection conversations does not exist."
- import time
- import ollama
- from fastapi import FastAPI
- from fastapi.middleware.cors import CORSMiddleware
- from fastapi.staticfiles import StaticFiles
- from fastapi.responses import PlainTextResponse, FileResponse
- from pydantic import BaseModel
- import uvicorn
- import os
- from annoy import AnnoyIndex
- import numpy as np
- # Declare constants for the model names
- CHAT_MODEL = 'llama3' # For generating text responses
- EMBEDDING_MODEL = 'nomic-embed-text' # For generating 384-dimensional embeddings
- PERSONALITY_FILE = "personality.txt" # The file containing the system prompt or personality
- ANNOY_INDEX_FILE = "memory.ann"
- VECTOR_DIM = 384 # Annoy index dimensionality
- # Initialize FastAPI app
- app = FastAPI()
- # CORS middleware setup
- app.add_middleware(
- CORSMiddleware,
- allow_origins=["*"], # Allow all origins, or specify a domain.
- allow_credentials=True,
- allow_methods=["*"], # Allow all HTTP methods.
- allow_headers=["*"], # Allow all headers.
- )
- # Serve static files from the "static" directory
- app.mount("/static", StaticFiles(directory="static"), name="static")
- # Initialize Annoy index
- annoy_index = AnnoyIndex(VECTOR_DIM, 'angular')
- # Check if Annoy index exists and load it if present
- if os.path.exists(ANNOY_INDEX_FILE):
- annoy_index.load(ANNOY_INDEX_FILE)
- # Define the PromptModel
- class PromptModel(BaseModel):
- prompt: str # Ensure the prompt field is a string
- # Load the system prompt/personality from the file
- def load_personality():
- try:
- with open(PERSONALITY_FILE, 'r') as file:
- return file.read().strip()
- except FileNotFoundError:
- return "Default system instructions or personality prompt."
- # Dummy function to simulate the chat model interaction (using llama3)
- def chat_with_model(system_prompt, user_prompt):
- # This is where you'd integrate with the actual AI model, such as llama3.
- # For now, it returns a dummy response.
- return f"System says: {system_prompt}. You said: {user_prompt}"
- # Store conversation and its embedding into Annoy index
- def store_conversation(prompt, response):
- # Simulate generating an embedding from the prompt/response
- embedding = np.random.rand(VECTOR_DIM).tolist() # Dummy embedding generation
- # Add the new item to the in-memory Annoy index (rebuilding from scratch)
- global annoy_index
- annoy_index = AnnoyIndex(VECTOR_DIM, 'angular') # Rebuild a fresh index
- annoy_index.add_item(annoy_index.get_n_items(), embedding)
- # Save the updated index
- annoy_index.build(10) # Build the index with 10 trees
- annoy_index.save(ANNOY_INDEX_FILE)
- # Retrieve similar conversations (dummy implementation)
- def get_similar_conversations(prompt):
- # Dummy similar conversations retrieval
- return []
- # FastAPI route to handle prompt input
- @app.post("/send_prompt/")
- def send_prompt(data: PromptModel):
- prompt = data.prompt
- # Load personality/system prompt from file
- system_prompt = load_personality()
- # Prepare the conversation with the system prompt and the user's input
- convo = [
- {'role': 'system', 'content': system_prompt}, # System instructions, only for AI's context
- {'role': 'user', 'content': prompt} # User input
- ]
- # Get the AI response (using a proper model integration)
- response = ollama.chat(CHAT_MODEL, convo)['message']['content']
- # Store the new conversation (user prompt and response)
- store_conversation(prompt, response)
- # Return only the response, not the system prompt
- return PlainTextResponse(response)
- # Serve the index.html file at the root URL
- @app.get("/")
- def read_root():
- return FileResponse('static/index.html')
- # Start the FastAPI app
- if __name__ == "__main__":
- uvicorn.run(app, host="127.0.0.1", port=8000)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement