diff --git a/llamaVisionApp.py b/llamaVisionApp.py index b849f5d..1581a49 100644 --- a/llamaVisionApp.py +++ b/llamaVisionApp.py @@ -1,3 +1,16 @@ +""" +File: llamaVisionApp.py +Author: Martin Rattensberger +Description: A GUI application for interacting with a local Llama vision model. + Users can upload images or PDFs and ask questions about them. +Date: 11.11.2024 # Replace with actual date +Version: 1.0 +Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5) + +This script creates a tkinter-based GUI for uploading images or PDFs, +sending them to a local Llama 3.2 vision model, and displaying the results. +""" + import tkinter as tk from tkinter import filedialog, scrolledtext import ollama @@ -5,6 +18,8 @@ from PIL import Image import fitz # PyMuPDF library for handling PDFs import io import base64 +import threading +import time class LlamaVisionApp: def __init__(self, master): @@ -19,21 +34,22 @@ class LlamaVisionApp: self.filename_label = tk.Label(master, text="No file selected") self.filename_label.pack() - # Question input - self.question_entry = tk.Entry(master, width=50) + # Question input - now larger + self.question_entry = tk.Text(master, width=50, height=3) # Changed from Entry to Text self.question_entry.pack(pady=10) - self.question_entry.insert(0, "What is in this image?") + self.question_entry.insert(tk.END, "What is in this image?") # Submit button self.submit_button = tk.Button(master, text="Submit", command=self.submit_question) self.submit_button.pack() - # Response display - now 3 times larger - self.response_text = scrolledtext.ScrolledText(master, width=60, height=30) # Height increased from 10 to 30 + # Response display + self.response_text = scrolledtext.ScrolledText(master, width=60, height=30) self.response_text.pack(pady=10) self.file_path = None self.image_data = None + self.processing = False def upload_file(self): self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")]) @@ -57,10 +73,32 @@ class LlamaVisionApp: def submit_question(self): if not self.image_data: + self.response_text.delete('1.0', tk.END) # Clear previous response self.response_text.insert(tk.END, "Please upload an image or PDF first.\n") return - question = self.question_entry.get() + question = self.question_entry.get('1.0', tk.END).strip() # Get text from Text widget + # Clear previous response + self.response_text.delete('1.0', tk.END) + + # Start processing animation + self.processing = True + threading.Thread(target=self.processing_animation).start() + + # Run the Llama model in a separate thread + threading.Thread(target=self.run_llama_model, args=(question,)).start() + + def processing_animation(self): + animation = "|/-\\" + i = 0 + while self.processing: + self.response_text.delete('1.0', tk.END) + self.response_text.insert(tk.END, f"Processing {animation[i % len(animation)]}") + self.master.update_idletasks() + time.sleep(0.1) + i += 1 + + def run_llama_model(self, question): try: response = ollama.chat( model='llama3.2-vision', @@ -70,9 +108,15 @@ class LlamaVisionApp: 'images': [self.image_data] }] ) - self.response_text.insert(tk.END, f"Q: {question}\nA: {response['message']['content']}\n\n") + self.processing = False + self.master.after(0, self.update_response, question, response['message']['content']) except Exception as e: - self.response_text.insert(tk.END, f"Error: {str(e)}\n") + self.processing = False + self.master.after(0, self.update_response, question, f"Error: {str(e)}") + + def update_response(self, question, answer): + self.response_text.delete('1.0', tk.END) + self.response_text.insert(tk.END, f"Q: {question}\nA: {answer}\n\n") root = tk.Tk() app = LlamaVisionApp(root)