AI chatbot with memory using SQLite3

#
# I have a site I am working on that has the whole thing including webpage tutorial (well you can cut/paste)
# can find it here https://buildofflineaiwithmemory.neocities.org/
# Updates: (contact [email protected])
#
# Oct 7, 2024
# ** WARNING ** REWROTE PYTHON SCRIPT BELOW.
# Switched persistent memory to Annoy. Chromadb seemed to have issues
# Now located here for full script and website ui integratoin
# https://buildofflineaiwithmemory.neocities.org/
#
# Oct 3, 2024
# system prompt is now external file (by request) called "personality.txt" for people that have huge peronality
# date time added
# FIX: "updating vector database: Collection conversations does not exist."

import time
import ollama
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from fastapi.responses import PlainTextResponse, FileResponse
from pydantic import BaseModel
import uvicorn
import os
from annoy import AnnoyIndex
import numpy as np

# Declare constants for the model names
CHAT_MODEL = 'llama3'  # For generating text responses
EMBEDDING_MODEL = 'nomic-embed-text'  # For generating 384-dimensional embeddings
PERSONALITY_FILE = "personality.txt"  # The file containing the system prompt or personality
ANNOY_INDEX_FILE = "memory.ann"
VECTOR_DIM = 384  # Annoy index dimensionality

# Initialize FastAPI app
app = FastAPI()

# CORS middleware setup
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all origins, or specify a domain.
    allow_credentials=True,
    allow_methods=["*"],  # Allow all HTTP methods.
    allow_headers=["*"],  # Allow all headers.
)

# Serve static files from the "static" directory
app.mount("/static", StaticFiles(directory="static"), name="static")

# Initialize Annoy index
annoy_index = AnnoyIndex(VECTOR_DIM, 'angular')

# Check if Annoy index exists and load it if present
if os.path.exists(ANNOY_INDEX_FILE):
    annoy_index.load(ANNOY_INDEX_FILE)

# Define the PromptModel
class PromptModel(BaseModel):
    prompt: str  # Ensure the prompt field is a string

# Load the system prompt/personality from the file
def load_personality():
    try:
        with open(PERSONALITY_FILE, 'r') as file:
            return file.read().strip()
    except FileNotFoundError:
        return "Default system instructions or personality prompt."

# Dummy function to simulate the chat model interaction (using llama3)
def chat_with_model(system_prompt, user_prompt):
    # This is where you'd integrate with the actual AI model, such as llama3.
    # For now, it returns a dummy response.
    return f"System says: {system_prompt}. You said: {user_prompt}"

# Store conversation and its embedding into Annoy index
def store_conversation(prompt, response):
    # Simulate generating an embedding from the prompt/response
    embedding = np.random.rand(VECTOR_DIM).tolist()  # Dummy embedding generation

    # Add the new item to the in-memory Annoy index (rebuilding from scratch)
    global annoy_index
    annoy_index = AnnoyIndex(VECTOR_DIM, 'angular')  # Rebuild a fresh index
    annoy_index.add_item(annoy_index.get_n_items(), embedding)

    # Save the updated index
    annoy_index.build(10)  # Build the index with 10 trees
    annoy_index.save(ANNOY_INDEX_FILE)

# Retrieve similar conversations (dummy implementation)
def get_similar_conversations(prompt):
    # Dummy similar conversations retrieval
    return []

# FastAPI route to handle prompt input
@app.post("/send_prompt/")
def send_prompt(data: PromptModel):
    prompt = data.prompt

    # Load personality/system prompt from file
    system_prompt = load_personality()

    # Prepare the conversation with the system prompt and the user's input
    convo = [
        {'role': 'system', 'content': system_prompt},  # System instructions, only for AI's context
        {'role': 'user', 'content': prompt}  # User input
    ]

    # Get the AI response (using a proper model integration)
    response = ollama.chat(CHAT_MODEL, convo)['message']['content']

    # Store the new conversation (user prompt and response)
    store_conversation(prompt, response)

    # Return only the response, not the system prompt
    return PlainTextResponse(response)

# Serve the index.html file at the root URL
@app.get("/")
def read_root():
    return FileResponse('static/index.html')

# Start the FastAPI app
if __name__ == "__main__":
    uvicorn.run(app, host="127.0.0.1", port=8000)