diff --git a/RAG-Demo.py b/RAG-Demo.py index 1581a49..58413ee 100644 --- a/RAG-Demo.py +++ b/RAG-Demo.py @@ -1,13 +1,13 @@ """ -File: llamaVisionApp.py +File: RAG-Demo.py Author: Martin Rattensberger Description: A GUI application for interacting with a local Llama vision model. - Users can upload images or PDFs and ask questions about them. + Users can select a directory with PDF files and ask questions about them. Date: 11.11.2024 # Replace with actual date -Version: 1.0 +Version: 1.1 Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5) -This script creates a tkinter-based GUI for uploading images or PDFs, +This script creates a tkinter-based GUI for selecting a directory with PDFs, sending them to a local Llama 3.2 vision model, and displaying the results. """ @@ -20,24 +20,29 @@ import io import base64 import threading import time +import os class LlamaVisionApp: def __init__(self, master): self.master = master master.title("Llama Vision Interface") - # File upload button - self.upload_button = tk.Button(master, text="Upload File", command=self.upload_file) - self.upload_button.pack(pady=10) + # Directory selection button + self.select_dir_button = tk.Button(master, text="Select PDF Directory", command=self.select_directory) + self.select_dir_button.pack(pady=10) - # Display selected filename - self.filename_label = tk.Label(master, text="No file selected") - self.filename_label.pack() + # Display selected directory + self.dir_label = tk.Label(master, text="No directory selected") + self.dir_label.pack() - # Question input - now larger - self.question_entry = tk.Text(master, width=50, height=3) # Changed from Entry to Text + # PDF file listbox + self.pdf_listbox = tk.Listbox(master, width=50, height=5) + self.pdf_listbox.pack(pady=10) + + # Question input + self.question_entry = tk.Text(master, width=50, height=3) self.question_entry.pack(pady=10) - self.question_entry.insert(tk.END, "What is in this image?") + self.question_entry.insert(tk.END, "What is in this PDF?") # Submit button self.submit_button = tk.Button(master, text="Submit", command=self.submit_question) @@ -47,45 +52,52 @@ class LlamaVisionApp: self.response_text = scrolledtext.ScrolledText(master, width=60, height=30) self.response_text.pack(pady=10) - self.file_path = None + self.directory_path = None + self.pdf_files = [] self.image_data = None self.processing = False - def upload_file(self): - self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")]) - if self.file_path: - self.filename_label.config(text=f"Selected file: {self.file_path}") - self.load_file() + def select_directory(self): + self.directory_path = filedialog.askdirectory() + if self.directory_path: + self.dir_label.config(text=f"Selected directory: {self.directory_path}") + self.load_pdf_files() - def load_file(self): - if self.file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): - with open(self.file_path, "rb") as image_file: - self.image_data = base64.b64encode(image_file.read()).decode('utf-8') - elif self.file_path.lower().endswith('.pdf'): - pdf_document = fitz.open(self.file_path) - first_page = pdf_document[0] - image = first_page.get_pixmap() - img = Image.frombytes("RGB", [image.width, image.height], image.samples) - buffer = io.BytesIO() - img.save(buffer, format="PNG") - self.image_data = base64.b64encode(buffer.getvalue()).decode('utf-8') - pdf_document.close() + def load_pdf_files(self): + self.pdf_files = [f for f in os.listdir(self.directory_path) if f.lower().endswith('.pdf')] + self.pdf_listbox.delete(0, tk.END) + for pdf in self.pdf_files: + self.pdf_listbox.insert(tk.END, pdf) + + def load_selected_pdf(self): + selected_indices = self.pdf_listbox.curselection() + if not selected_indices: + return None + selected_pdf = self.pdf_files[selected_indices[0]] + pdf_path = os.path.join(self.directory_path, selected_pdf) + + pdf_document = fitz.open(pdf_path) + first_page = pdf_document[0] + image = first_page.get_pixmap() + img = Image.frombytes("RGB", [image.width, image.height], image.samples) + buffer = io.BytesIO() + img.save(buffer, format="PNG") + image_data = base64.b64encode(buffer.getvalue()).decode('utf-8') + pdf_document.close() + return image_data def submit_question(self): + self.image_data = self.load_selected_pdf() if not self.image_data: - self.response_text.delete('1.0', tk.END) # Clear previous response - self.response_text.insert(tk.END, "Please upload an image or PDF first.\n") + self.response_text.delete('1.0', tk.END) + self.response_text.insert(tk.END, "Please select a PDF file first.\n") return - question = self.question_entry.get('1.0', tk.END).strip() # Get text from Text widget - # Clear previous response + question = self.question_entry.get('1.0', tk.END).strip() self.response_text.delete('1.0', tk.END) - # Start processing animation self.processing = True threading.Thread(target=self.processing_animation).start() - - # Run the Llama model in a separate thread threading.Thread(target=self.run_llama_model, args=(question,)).start() def processing_animation(self):