Files
Lokales_RAG/RAG-Demo.py

124 lines
4.6 KiB
Python

"""
File: llamaVisionApp.py
Author: Martin Rattensberger
Description: A GUI application for interacting with a local Llama vision model.
Users can upload images or PDFs and ask questions about them.
Date: 11.11.2024 # Replace with actual date
Version: 1.0
Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5)
This script creates a tkinter-based GUI for uploading images or PDFs,
sending them to a local Llama 3.2 vision model, and displaying the results.
"""
import tkinter as tk
from tkinter import filedialog, scrolledtext
import ollama
from PIL import Image
import fitz # PyMuPDF library for handling PDFs
import io
import base64
import threading
import time
class LlamaVisionApp:
def __init__(self, master):
self.master = master
master.title("Llama Vision Interface")
# File upload button
self.upload_button = tk.Button(master, text="Upload File", command=self.upload_file)
self.upload_button.pack(pady=10)
# Display selected filename
self.filename_label = tk.Label(master, text="No file selected")
self.filename_label.pack()
# Question input - now larger
self.question_entry = tk.Text(master, width=50, height=3) # Changed from Entry to Text
self.question_entry.pack(pady=10)
self.question_entry.insert(tk.END, "What is in this image?")
# Submit button
self.submit_button = tk.Button(master, text="Submit", command=self.submit_question)
self.submit_button.pack()
# Response display
self.response_text = scrolledtext.ScrolledText(master, width=60, height=30)
self.response_text.pack(pady=10)
self.file_path = None
self.image_data = None
self.processing = False
def upload_file(self):
self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")])
if self.file_path:
self.filename_label.config(text=f"Selected file: {self.file_path}")
self.load_file()
def load_file(self):
if self.file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
with open(self.file_path, "rb") as image_file:
self.image_data = base64.b64encode(image_file.read()).decode('utf-8')
elif self.file_path.lower().endswith('.pdf'):
pdf_document = fitz.open(self.file_path)
first_page = pdf_document[0]
image = first_page.get_pixmap()
img = Image.frombytes("RGB", [image.width, image.height], image.samples)
buffer = io.BytesIO()
img.save(buffer, format="PNG")
self.image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
pdf_document.close()
def submit_question(self):
if not self.image_data:
self.response_text.delete('1.0', tk.END) # Clear previous response
self.response_text.insert(tk.END, "Please upload an image or PDF first.\n")
return
question = self.question_entry.get('1.0', tk.END).strip() # Get text from Text widget
# Clear previous response
self.response_text.delete('1.0', tk.END)
# Start processing animation
self.processing = True
threading.Thread(target=self.processing_animation).start()
# Run the Llama model in a separate thread
threading.Thread(target=self.run_llama_model, args=(question,)).start()
def processing_animation(self):
animation = "|/-\\"
i = 0
while self.processing:
self.response_text.delete('1.0', tk.END)
self.response_text.insert(tk.END, f"Processing {animation[i % len(animation)]}")
self.master.update_idletasks()
time.sleep(0.1)
i += 1
def run_llama_model(self, question):
try:
response = ollama.chat(
model='llama3.2-vision',
messages=[{
'role': 'user',
'content': question,
'images': [self.image_data]
}]
)
self.processing = False
self.master.after(0, self.update_response, question, response['message']['content'])
except Exception as e:
self.processing = False
self.master.after(0, self.update_response, question, f"Error: {str(e)}")
def update_response(self, question, answer):
self.response_text.delete('1.0', tk.END)
self.response_text.insert(tk.END, f"Q: {question}\nA: {answer}\n\n")
root = tk.Tk()
app = LlamaVisionApp(root)
root.mainloop()