From 1151a1169a287c337780efc695d7138d178850f6 Mon Sep 17 00:00:00 2001 From: Martin Rattensberger Date: Mon, 11 Nov 2024 08:36:04 +0100 Subject: [PATCH] 1. Testversion --- README.md | 2 +- llamaVisionApp.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 llamaVisionApp.py diff --git a/README.md b/README.md index 841a425..4cf35d0 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,3 @@ 1. Test mit lokalen Llama Modell 3.2 inkl. Vision -Kleiner Webserver mit Interface zum Upload einer Bilddatei, die von Llama ausgewertet wird \ No newline at end of file +Kleiner Webserver mit Interface zum Upload einer Bilddatei, die von Llama ausgewertet wird diff --git a/llamaVisionApp.py b/llamaVisionApp.py new file mode 100644 index 0000000..b849f5d --- /dev/null +++ b/llamaVisionApp.py @@ -0,0 +1,79 @@ +import tkinter as tk +from tkinter import filedialog, scrolledtext +import ollama +from PIL import Image +import fitz # PyMuPDF library for handling PDFs +import io +import base64 + +class LlamaVisionApp: + def __init__(self, master): + self.master = master + master.title("Llama Vision Interface") + + # File upload button + self.upload_button = tk.Button(master, text="Upload File", command=self.upload_file) + self.upload_button.pack(pady=10) + + # Display selected filename + self.filename_label = tk.Label(master, text="No file selected") + self.filename_label.pack() + + # Question input + self.question_entry = tk.Entry(master, width=50) + self.question_entry.pack(pady=10) + self.question_entry.insert(0, "What is in this image?") + + # Submit button + self.submit_button = tk.Button(master, text="Submit", command=self.submit_question) + self.submit_button.pack() + + # Response display - now 3 times larger + self.response_text = scrolledtext.ScrolledText(master, width=60, height=30) # Height increased from 10 to 30 + self.response_text.pack(pady=10) + + self.file_path = None + self.image_data = None + + def upload_file(self): + self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")]) + if self.file_path: + self.filename_label.config(text=f"Selected file: {self.file_path}") + self.load_file() + + def load_file(self): + if self.file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')): + with open(self.file_path, "rb") as image_file: + self.image_data = base64.b64encode(image_file.read()).decode('utf-8') + elif self.file_path.lower().endswith('.pdf'): + pdf_document = fitz.open(self.file_path) + first_page = pdf_document[0] + image = first_page.get_pixmap() + img = Image.frombytes("RGB", [image.width, image.height], image.samples) + buffer = io.BytesIO() + img.save(buffer, format="PNG") + self.image_data = base64.b64encode(buffer.getvalue()).decode('utf-8') + pdf_document.close() + + def submit_question(self): + if not self.image_data: + self.response_text.insert(tk.END, "Please upload an image or PDF first.\n") + return + + question = self.question_entry.get() + try: + response = ollama.chat( + model='llama3.2-vision', + messages=[{ + 'role': 'user', + 'content': question, + 'images': [self.image_data] + }] + ) + self.response_text.insert(tk.END, f"Q: {question}\nA: {response['message']['content']}\n\n") + except Exception as e: + self.response_text.insert(tk.END, f"Error: {str(e)}\n") + +root = tk.Tk() +app = LlamaVisionApp(root) +root.mainloop()