Umbau der Basis zu Verzeichniswahl mit PDFs

2024-12-04 18:50:34 +01:00
parent 9e395f5332
commit 9507403c46
1 changed files with 51 additions and 39 deletions
--- a/RAG-Demo.py
+++ b/RAG-Demo.py
@@ -1,13 +1,13 @@
 """
-File: llamaVisionApp.py
+File: RAG-Demo.py
 Author: Martin Rattensberger
 Description: A GUI application for interacting with a local Llama vision model.
-             Users can upload images or PDFs and ask questions about them.
+             Users can select a directory with PDF files and ask questions about them.
 Date: 11.11.2024 # Replace with actual date
-Version: 1.0
+Version: 1.1
 Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5)

-This script creates a tkinter-based GUI for uploading images or PDFs,
+This script creates a tkinter-based GUI for selecting a directory with PDFs,
 sending them to a local Llama 3.2 vision model, and displaying the results.
 """

@@ -20,24 +20,29 @@ import io
 import base64
 import threading
 import time
+import os

 class LlamaVisionApp:
    def __init__(self, master):
        self.master = master
        master.title("Llama Vision Interface")

-        # File upload button
-        self.upload_button = tk.Button(master, text="Upload File", command=self.upload_file)
-        self.upload_button.pack(pady=10)
+        # Directory selection button
+        self.select_dir_button = tk.Button(master, text="Select PDF Directory", command=self.select_directory)
+        self.select_dir_button.pack(pady=10)

-        # Display selected filename
-        self.filename_label = tk.Label(master, text="No file selected")
-        self.filename_label.pack()
+        # Display selected directory
+        self.dir_label = tk.Label(master, text="No directory selected")
+        self.dir_label.pack()

-        # Question input - now larger
-        self.question_entry = tk.Text(master, width=50, height=3)  # Changed from Entry to Text
+        # PDF file listbox
+        self.pdf_listbox = tk.Listbox(master, width=50, height=5)
+        self.pdf_listbox.pack(pady=10)
+
+        # Question input
+        self.question_entry = tk.Text(master, width=50, height=3)
        self.question_entry.pack(pady=10)
-        self.question_entry.insert(tk.END, "What is in this image?")
+        self.question_entry.insert(tk.END, "What is in this PDF?")

        # Submit button
        self.submit_button = tk.Button(master, text="Submit", command=self.submit_question)
@@ -47,45 +52,52 @@ class LlamaVisionApp:
        self.response_text = scrolledtext.ScrolledText(master, width=60, height=30)
        self.response_text.pack(pady=10)

-        self.file_path = None
+        self.directory_path = None
+        self.pdf_files = []
        self.image_data = None
        self.processing = False

-    def upload_file(self):
-        self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")])
-        if self.file_path:
-            self.filename_label.config(text=f"Selected file: {self.file_path}")
-            self.load_file()
+    def select_directory(self):
+        self.directory_path = filedialog.askdirectory()
+        if self.directory_path:
+            self.dir_label.config(text=f"Selected directory: {self.directory_path}")
+            self.load_pdf_files()

-    def load_file(self):
-        if self.file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
-            with open(self.file_path, "rb") as image_file:
-                self.image_data = base64.b64encode(image_file.read()).decode('utf-8')
-        elif self.file_path.lower().endswith('.pdf'):
-            pdf_document = fitz.open(self.file_path)
-            first_page = pdf_document[0]
-            image = first_page.get_pixmap()
-            img = Image.frombytes("RGB", [image.width, image.height], image.samples)
-            buffer = io.BytesIO()
-            img.save(buffer, format="PNG")
-            self.image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
-            pdf_document.close()
+    def load_pdf_files(self):
+        self.pdf_files = [f for f in os.listdir(self.directory_path) if f.lower().endswith('.pdf')]
+        self.pdf_listbox.delete(0, tk.END)
+        for pdf in self.pdf_files:
+            self.pdf_listbox.insert(tk.END, pdf)
+
+    def load_selected_pdf(self):
+        selected_indices = self.pdf_listbox.curselection()
+        if not selected_indices:
+            return None
+        selected_pdf = self.pdf_files[selected_indices[0]]
+        pdf_path = os.path.join(self.directory_path, selected_pdf)
+        
+        pdf_document = fitz.open(pdf_path)
+        first_page = pdf_document[0]
+        image = first_page.get_pixmap()
+        img = Image.frombytes("RGB", [image.width, image.height], image.samples)
+        buffer = io.BytesIO()
+        img.save(buffer, format="PNG")
+        image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
+        pdf_document.close()
+        return image_data

    def submit_question(self):
+        self.image_data = self.load_selected_pdf()
        if not self.image_data:
-            self.response_text.delete('1.0', tk.END)  # Clear previous response
-            self.response_text.insert(tk.END, "Please upload an image or PDF first.\n")
+            self.response_text.delete('1.0', tk.END)
+            self.response_text.insert(tk.END, "Please select a PDF file first.\n")
            return

-        question = self.question_entry.get('1.0', tk.END).strip()  # Get text from Text widget
-        # Clear previous response
+        question = self.question_entry.get('1.0', tk.END).strip()
        self.response_text.delete('1.0', tk.END)
            
-        # Start processing animation
        self.processing = True
        threading.Thread(target=self.processing_animation).start()
-
-        # Run the Llama model in a separate thread
        threading.Thread(target=self.run_llama_model, args=(question,)).start()

    def processing_animation(self):