Lokales_RAG/RAG-Demo.py

"""
File: RAG-Demo.py
Author: Martin Rattensberger
Description: A GUI application for interacting with a local Llama vision model.
             Users can select a directory with PDF files and ask questions about them.
Date: 11.11.2024 # Replace with actual date
Version: 1.1
Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5)

This script creates a tkinter-based GUI for selecting a directory with PDFs,
sending them to a local Llama 3.2 vision model, and displaying the results.
"""

import tkinter as tk
from tkinter import filedialog, scrolledtext
import ollama
from PIL import Image
import fitz  # PyMuPDF library for handling PDFs
import io
import base64
import threading
import time
import os

class LlamaVisionApp:
    def __init__(self, master):
        self.master = master
        master.title("Llama Vision Interface")

        # Directory selection button
        self.select_dir_button = tk.Button(master, text="Select PDF Directory", command=self.select_directory)
        self.select_dir_button.pack(pady=10)

        # Display selected directory
        self.dir_label = tk.Label(master, text="No directory selected")
        self.dir_label.pack()

        # PDF file listbox
        self.pdf_listbox = tk.Listbox(master, width=50, height=5)
        self.pdf_listbox.pack(pady=10)

        # Question input
        self.question_entry = tk.Text(master, width=50, height=3)
        self.question_entry.pack(pady=10)
        self.question_entry.insert(tk.END, "What is in this PDF?")

        # Submit button
        self.submit_button = tk.Button(master, text="Submit", command=self.submit_question)
        self.submit_button.pack()

        # Response display
        self.response_text = scrolledtext.ScrolledText(master, width=60, height=30)
        self.response_text.pack(pady=10)

        self.directory_path = None
        self.pdf_files = []
        self.image_data = None
        self.processing = False

    def select_directory(self):
        self.directory_path = filedialog.askdirectory()
        if self.directory_path:
            self.dir_label.config(text=f"Selected directory: {self.directory_path}")
            self.load_pdf_files()

    def load_pdf_files(self):
        self.pdf_files = [f for f in os.listdir(self.directory_path) if f.lower().endswith('.pdf')]
        self.pdf_listbox.delete(0, tk.END)
        for pdf in self.pdf_files:
            self.pdf_listbox.insert(tk.END, pdf)

    def load_selected_pdf(self):
        selected_indices = self.pdf_listbox.curselection()
        if not selected_indices:
            return None
        selected_pdf = self.pdf_files[selected_indices[0]]
        pdf_path = os.path.join(self.directory_path, selected_pdf)

        pdf_document = fitz.open(pdf_path)
        first_page = pdf_document[0]
        image = first_page.get_pixmap()
        img = Image.frombytes("RGB", [image.width, image.height], image.samples)
        buffer = io.BytesIO()
        img.save(buffer, format="PNG")
        image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
        pdf_document.close()
        return image_data

    def submit_question(self):
        self.image_data = self.load_selected_pdf()
        if not self.image_data:
            self.response_text.delete('1.0', tk.END)
            self.response_text.insert(tk.END, "Please select a PDF file first.\n")
            return

        question = self.question_entry.get('1.0', tk.END).strip()
        self.response_text.delete('1.0', tk.END)

        self.processing = True
        threading.Thread(target=self.processing_animation).start()
        threading.Thread(target=self.run_llama_model, args=(question,)).start()

    def processing_animation(self):
        animation = "|/-\\"
        i = 0
        while self.processing:
            self.response_text.delete('1.0', tk.END)
            self.response_text.insert(tk.END, f"Processing {animation[i % len(animation)]}")
            self.master.update_idletasks()
            time.sleep(0.1)
            i += 1

    def run_llama_model(self, question):
        try:
            response = ollama.chat(
                model='llama3.2-vision',
                messages=[{
                    'role': 'user',
                    'content': question,
                    'images': [self.image_data]
                }]
            )
            self.processing = False
            self.master.after(0, self.update_response, question, response['message']['content'])
        except Exception as e:
            self.processing = False
            self.master.after(0, self.update_response, question, f"Error: {str(e)}")

    def update_response(self, question, answer):
        self.response_text.delete('1.0', tk.END)
        self.response_text.insert(tk.END, f"Q: {question}\nA: {answer}\n\n")

root = tk.Tk()
app = LlamaVisionApp(root)
root.mainloop()