diff --git a/RAG-Demo.py b/RAG-Demo.py index 58413ee..bfe3503 100644 --- a/RAG-Demo.py +++ b/RAG-Demo.py @@ -2,13 +2,14 @@ File: RAG-Demo.py Author: Martin Rattensberger Description: A GUI application for interacting with a local Llama vision model. - Users can select a directory with PDF files and ask questions about them. + Users can select a directory with PDF files, load them into a vector database, + and ask questions about them. Date: 11.11.2024 # Replace with actual date -Version: 1.1 +Version: 1.2 Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5) This script creates a tkinter-based GUI for selecting a directory with PDFs, -sending them to a local Llama 3.2 vision model, and displaying the results. +loading them into a LanceDB vector database, and querying them using a local Llama 3.2 vision model. """ import tkinter as tk @@ -21,11 +22,30 @@ import base64 import threading import time import os +import lancedb +import numpy as np +import pyarrow as pa +from sentence_transformers import SentenceTransformer + class LlamaVisionApp: def __init__(self, master): self.master = master - master.title("Llama Vision Interface") + master.title("Llama Vision Interface RAG") + + # Initialize LanceDB and sentence transformer + self.db = lancedb.connect("./lancedb") + self.db.drop_table("pdf_embeddings") + schema = pa.schema([ + ('id', pa.int64()), + ('filename', pa.string()), + ('page', pa.int64()), + ('text', pa.string()), + ("embedding", pa.list_(pa.float32(), 384)) + ]) + + self.table = self.db.create_table("pdf_embeddings", schema=schema) + self.model = SentenceTransformer('all-MiniLM-L6-v2') # Directory selection button self.select_dir_button = tk.Button(master, text="Select PDF Directory", command=self.select_directory) @@ -35,14 +55,14 @@ class LlamaVisionApp: self.dir_label = tk.Label(master, text="No directory selected") self.dir_label.pack() - # PDF file listbox - self.pdf_listbox = tk.Listbox(master, width=50, height=5) - self.pdf_listbox.pack(pady=10) + # Load PDFs button + self.load_pdfs_button = tk.Button(master, text="Load PDFs into Database", command=self.load_pdfs_to_db) + self.load_pdfs_button.pack(pady=10) # Question input self.question_entry = tk.Text(master, width=50, height=3) self.question_entry.pack(pady=10) - self.question_entry.insert(tk.END, "What is in this PDF?") + self.question_entry.insert(tk.END, "What is in these PDFs?") # Submit button self.submit_button = tk.Button(master, text="Submit", command=self.submit_question) @@ -54,51 +74,79 @@ class LlamaVisionApp: self.directory_path = None self.pdf_files = [] - self.image_data = None self.processing = False def select_directory(self): self.directory_path = filedialog.askdirectory() if self.directory_path: self.dir_label.config(text=f"Selected directory: {self.directory_path}") - self.load_pdf_files() + self.pdf_files = [f for f in os.listdir(self.directory_path) if f.lower().endswith('.pdf')] - def load_pdf_files(self): - self.pdf_files = [f for f in os.listdir(self.directory_path) if f.lower().endswith('.pdf')] - self.pdf_listbox.delete(0, tk.END) - for pdf in self.pdf_files: - self.pdf_listbox.insert(tk.END, pdf) - - def load_selected_pdf(self): - selected_indices = self.pdf_listbox.curselection() - if not selected_indices: - return None - selected_pdf = self.pdf_files[selected_indices[0]] - pdf_path = os.path.join(self.directory_path, selected_pdf) - - pdf_document = fitz.open(pdf_path) - first_page = pdf_document[0] - image = first_page.get_pixmap() - img = Image.frombytes("RGB", [image.width, image.height], image.samples) - buffer = io.BytesIO() - img.save(buffer, format="PNG") - image_data = base64.b64encode(buffer.getvalue()).decode('utf-8') - pdf_document.close() - return image_data - - def submit_question(self): - self.image_data = self.load_selected_pdf() - if not self.image_data: + def load_pdfs_to_db(self): + if not self.directory_path: self.response_text.delete('1.0', tk.END) - self.response_text.insert(tk.END, "Please select a PDF file first.\n") + self.response_text.insert(tk.END, "Please select a directory first.\n") return + self.processing = True + threading.Thread(target=self.processing_animation).start() + threading.Thread(target=self.process_pdfs).start() + + def process_pdfs(self): + data = [] + id_counter = 0 + for pdf_file in self.pdf_files: + pdf_path = os.path.join(self.directory_path, pdf_file) + doc = fitz.open(pdf_path) + for page_num in range(len(doc)): + page = doc[page_num] + text = page.get_text() + embedding = self.model.encode(text) + data.append({ + "id": id_counter, + "filename": pdf_file, + "page": page_num, + "text": text, + "embedding": embedding.tolist() + }) + id_counter += 1 + doc.close() + + self.table.add(data) + self.processing = False + self.master.after(0, self.update_response, "Load Complete", f"Loaded {len(data)} pages from {len(self.pdf_files)} PDFs into the database.") + + def submit_question(self): question = self.question_entry.get('1.0', tk.END).strip() self.response_text.delete('1.0', tk.END) self.processing = True threading.Thread(target=self.processing_animation).start() - threading.Thread(target=self.run_llama_model, args=(question,)).start() + threading.Thread(target=self.query_database, args=(question,)).start() + + def query_database(self, question): + try: + question_embedding = self.model.encode(question) + results = self.table.search(question_embedding).limit(5).to_list() + + context = "\n".join([f"From {r['filename']} (Page {r['page']+1}):\n{r['text'][:500]}..." for r in results]) + + response = ollama.chat( + model='llama3.2-vision', + messages=[{ + 'role': 'system', + 'content': f"You are an AI assistant that answers questions based on the following context:\n\n{context}" + }, + { + 'role': 'user', + 'content': question + }] + ) + self.processing = False + self.master.after(0, self.update_response, question, response['message']['content']) + except Exception as e: + self.processing = False + self.master.after(0, self.update_response, question, f"Error: {str(e)}") def processing_animation(self): animation = "|/-\\" @@ -110,26 +158,10 @@ class LlamaVisionApp: time.sleep(0.1) i += 1 - def run_llama_model(self, question): - try: - response = ollama.chat( - model='llama3.2-vision', - messages=[{ - 'role': 'user', - 'content': question, - 'images': [self.image_data] - }] - ) - self.processing = False - self.master.after(0, self.update_response, question, response['message']['content']) - except Exception as e: - self.processing = False - self.master.after(0, self.update_response, question, f"Error: {str(e)}") - def update_response(self, question, answer): self.response_text.delete('1.0', tk.END) self.response_text.insert(tk.END, f"Q: {question}\nA: {answer}\n\n") root = tk.Tk() app = LlamaVisionApp(root) -root.mainloop() +root.mainloop() \ No newline at end of file diff --git a/dateien/Steuerdokumente/Der nationale und der europäische Teilbetriebsbegriff im Vergleich (Teil I) - beck-online.pdf b/dateien/Dokumente/Der nationale und der europäische Teilbetriebsbegriff im Vergleich (Teil I) - beck-online.pdf similarity index 100% rename from dateien/Steuerdokumente/Der nationale und der europäische Teilbetriebsbegriff im Vergleich (Teil I) - beck-online.pdf rename to dateien/Dokumente/Der nationale und der europäische Teilbetriebsbegriff im Vergleich (Teil I) - beck-online.pdf diff --git a/dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock (D_P_M), Die Körperschaftsteuer, UmwStG § 15 (SEStEG) Aufspaltung, Abspaltung und Teilübertragung auf andere Körperschaften - DruckenAM.pdf b/dateien/Dokumente/Dötsch_Pung_Möhlenbrock (D_P_M), Die Körperschaftsteuer, UmwStG § 15 (SEStEG) Aufspaltung, Abspaltung und Teilübertragung auf andere Körperschaften - DruckenAM.pdf similarity index 100% rename from dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock (D_P_M), Die Körperschaftsteuer, UmwStG § 15 (SEStEG) Aufspaltung, Abspaltung und Teilübertragung auf andere Körperschaften - DruckenAM.pdf rename to dateien/Dokumente/Dötsch_Pung_Möhlenbrock (D_P_M), Die Körperschaftsteuer, UmwStG § 15 (SEStEG) Aufspaltung, Abspaltung und Teilübertragung auf andere Körperschaften - DruckenAM.pdf diff --git a/dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock_1.pdf b/dateien/Dokumente/Dötsch_Pung_Möhlenbrock_1.pdf similarity index 100% rename from dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock_1.pdf rename to dateien/Dokumente/Dötsch_Pung_Möhlenbrock_1.pdf diff --git a/dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock_2.pdf b/dateien/Dokumente/Dötsch_Pung_Möhlenbrock_2.pdf similarity index 100% rename from dateien/Steuerdokumente/Dötsch_Pung_Möhlenbrock_2.pdf rename to dateien/Dokumente/Dötsch_Pung_Möhlenbrock_2.pdf diff --git a/lancedb/pdf_embeddings.lance/_transactions/0-67ee0130-5203-440d-8ec2-ca8cd9316cdf.txn b/lancedb/pdf_embeddings.lance/_transactions/0-67ee0130-5203-440d-8ec2-ca8cd9316cdf.txn new file mode 100644 index 0000000..d5cb4d8 --- /dev/null +++ b/lancedb/pdf_embeddings.lance/_transactions/0-67ee0130-5203-440d-8ec2-ca8cd9316cdf.txn @@ -0,0 +1 @@ +$67ee0130-5203-440d-8ec2-ca8cd9316cdf²è#id ÿÿÿÿÿÿÿÿÿ*int6408Zdefault,filename ÿÿÿÿÿÿÿÿÿ*string08Zdefault'page ÿÿÿÿÿÿÿÿÿ*int6408Zdefault(text ÿÿÿÿÿÿÿÿÿ*string08Zdefault@ embedding ÿÿÿÿÿÿÿÿÿ*fixed_size_list:float:38408Zdefault \ No newline at end of file diff --git a/lancedb/pdf_embeddings.lance/_transactions/1-8d587c71-015a-41f9-8503-e6e8568abf89.txn b/lancedb/pdf_embeddings.lance/_transactions/1-8d587c71-015a-41f9-8503-e6e8568abf89.txn new file mode 100644 index 0000000..a980920 Binary files /dev/null and b/lancedb/pdf_embeddings.lance/_transactions/1-8d587c71-015a-41f9-8503-e6e8568abf89.txn differ diff --git a/lancedb/pdf_embeddings.lance/_transactions/2-ed119f5c-2bb6-458d-af90-d8d523b8187f.txn b/lancedb/pdf_embeddings.lance/_transactions/2-ed119f5c-2bb6-458d-af90-d8d523b8187f.txn new file mode 100644 index 0000000..058c475 Binary files /dev/null and b/lancedb/pdf_embeddings.lance/_transactions/2-ed119f5c-2bb6-458d-af90-d8d523b8187f.txn differ diff --git a/lancedb/pdf_embeddings.lance/_versions/1.manifest b/lancedb/pdf_embeddings.lance/_versions/1.manifest new file mode 100644 index 0000000..a946f5d Binary files /dev/null and b/lancedb/pdf_embeddings.lance/_versions/1.manifest differ diff --git a/lancedb/pdf_embeddings.lance/_versions/2.manifest b/lancedb/pdf_embeddings.lance/_versions/2.manifest new file mode 100644 index 0000000..9d5d9c3 Binary files /dev/null and b/lancedb/pdf_embeddings.lance/_versions/2.manifest differ diff --git a/lancedb/pdf_embeddings.lance/_versions/3.manifest b/lancedb/pdf_embeddings.lance/_versions/3.manifest new file mode 100644 index 0000000..b8880f5 Binary files /dev/null and b/lancedb/pdf_embeddings.lance/_versions/3.manifest differ diff --git a/lancedb/pdf_embeddings.lance/data/da484e8c-ffdf-4684-ab37-13c30efbd25a.lance b/lancedb/pdf_embeddings.lance/data/da484e8c-ffdf-4684-ab37-13c30efbd25a.lance new file mode 100644 index 0000000..b79a368 Binary files /dev/null and b/lancedb/pdf_embeddings.lance/data/da484e8c-ffdf-4684-ab37-13c30efbd25a.lance differ