datenbasis aus test mitr vision-Modell
This commit is contained in:
206
RAG-Demo.py
206
RAG-Demo.py
@@ -1,99 +1,123 @@
|
||||
import os
|
||||
import glob
|
||||
import lancedb
|
||||
from PyPDF2 import PdfReader
|
||||
import openai
|
||||
"""
|
||||
File: llamaVisionApp.py
|
||||
Author: Martin Rattensberger
|
||||
Description: A GUI application for interacting with a local Llama vision model.
|
||||
Users can upload images or PDFs and ask questions about them.
|
||||
Date: 11.11.2024 # Replace with actual date
|
||||
Version: 1.0
|
||||
Development Environment: Visual Studio Code with Continue.ai (Claude Sonnet 3.5)
|
||||
|
||||
This script creates a tkinter-based GUI for uploading images or PDFs,
|
||||
sending them to a local Llama 3.2 vision model, and displaying the results.
|
||||
"""
|
||||
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from config import OPENAI_API_KEY
|
||||
from tkinter import filedialog, scrolledtext
|
||||
import ollama
|
||||
from PIL import Image
|
||||
import fitz # PyMuPDF library for handling PDFs
|
||||
import io
|
||||
import base64
|
||||
import threading
|
||||
import time
|
||||
|
||||
# Setze deinen OpenAI API-Schlüssel
|
||||
openai.api_key = OPENAI_API_KEY
|
||||
class LlamaVisionApp:
|
||||
def __init__(self, master):
|
||||
self.master = master
|
||||
master.title("Llama Vision Interface")
|
||||
|
||||
def extract_text_from_pdf(file_path):
|
||||
reader = PdfReader(file_path)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
text += page_text
|
||||
return text
|
||||
# File upload button
|
||||
self.upload_button = tk.Button(master, text="Upload File", command=self.upload_file)
|
||||
self.upload_button.pack(pady=10)
|
||||
|
||||
def get_embedding(text):
|
||||
# OpenAI Embeddings API verwenden
|
||||
response = openai.Embedding.create(
|
||||
input=text,
|
||||
model="text-embedding-ada-002"
|
||||
# Display selected filename
|
||||
self.filename_label = tk.Label(master, text="No file selected")
|
||||
self.filename_label.pack()
|
||||
|
||||
# Question input - now larger
|
||||
self.question_entry = tk.Text(master, width=50, height=3) # Changed from Entry to Text
|
||||
self.question_entry.pack(pady=10)
|
||||
self.question_entry.insert(tk.END, "What is in this image?")
|
||||
|
||||
# Submit button
|
||||
self.submit_button = tk.Button(master, text="Submit", command=self.submit_question)
|
||||
self.submit_button.pack()
|
||||
|
||||
# Response display
|
||||
self.response_text = scrolledtext.ScrolledText(master, width=60, height=30)
|
||||
self.response_text.pack(pady=10)
|
||||
|
||||
self.file_path = None
|
||||
self.image_data = None
|
||||
self.processing = False
|
||||
|
||||
def upload_file(self):
|
||||
self.file_path = filedialog.askopenfilename(filetypes=[("Image files", "*.png *.jpg *.jpeg *.gif"), ("PDF files", "*.pdf")])
|
||||
if self.file_path:
|
||||
self.filename_label.config(text=f"Selected file: {self.file_path}")
|
||||
self.load_file()
|
||||
|
||||
def load_file(self):
|
||||
if self.file_path.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
|
||||
with open(self.file_path, "rb") as image_file:
|
||||
self.image_data = base64.b64encode(image_file.read()).decode('utf-8')
|
||||
elif self.file_path.lower().endswith('.pdf'):
|
||||
pdf_document = fitz.open(self.file_path)
|
||||
first_page = pdf_document[0]
|
||||
image = first_page.get_pixmap()
|
||||
img = Image.frombytes("RGB", [image.width, image.height], image.samples)
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="PNG")
|
||||
self.image_data = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
pdf_document.close()
|
||||
|
||||
def submit_question(self):
|
||||
if not self.image_data:
|
||||
self.response_text.delete('1.0', tk.END) # Clear previous response
|
||||
self.response_text.insert(tk.END, "Please upload an image or PDF first.\n")
|
||||
return
|
||||
|
||||
question = self.question_entry.get('1.0', tk.END).strip() # Get text from Text widget
|
||||
# Clear previous response
|
||||
self.response_text.delete('1.0', tk.END)
|
||||
|
||||
# Start processing animation
|
||||
self.processing = True
|
||||
threading.Thread(target=self.processing_animation).start()
|
||||
|
||||
# Run the Llama model in a separate thread
|
||||
threading.Thread(target=self.run_llama_model, args=(question,)).start()
|
||||
|
||||
def processing_animation(self):
|
||||
animation = "|/-\\"
|
||||
i = 0
|
||||
while self.processing:
|
||||
self.response_text.delete('1.0', tk.END)
|
||||
self.response_text.insert(tk.END, f"Processing {animation[i % len(animation)]}")
|
||||
self.master.update_idletasks()
|
||||
time.sleep(0.1)
|
||||
i += 1
|
||||
|
||||
def run_llama_model(self, question):
|
||||
try:
|
||||
response = ollama.chat(
|
||||
model='llama3.2-vision',
|
||||
messages=[{
|
||||
'role': 'user',
|
||||
'content': question,
|
||||
'images': [self.image_data]
|
||||
}]
|
||||
)
|
||||
embedding = response['data'][0]['embedding']
|
||||
return embedding
|
||||
self.processing = False
|
||||
self.master.after(0, self.update_response, question, response['message']['content'])
|
||||
except Exception as e:
|
||||
self.processing = False
|
||||
self.master.after(0, self.update_response, question, f"Error: {str(e)}")
|
||||
|
||||
def load_pdfs_to_lancedb(directory, db_path):
|
||||
# Verbindung zur LanceDB herstellen (synchroner Client)
|
||||
db = lancedb.connect(db_path)
|
||||
table_name = 'pdf_embeddings'
|
||||
data = []
|
||||
# Über alle PDFs im Verzeichnis iterieren
|
||||
for pdf_file in glob.glob(os.path.join(directory, "*.pdf")):
|
||||
text = extract_text_from_pdf(pdf_file)
|
||||
if not text:
|
||||
continue
|
||||
embedding = get_embedding(text)
|
||||
# Daten zum Einfügen vorbereiten
|
||||
data.append({
|
||||
'vector': embedding,
|
||||
'file_link': pdf_file,
|
||||
'text': text # Optional, könnte hilfreich sein
|
||||
})
|
||||
# Tabelle erstellen oder öffnen
|
||||
if table_name in db.table_names():
|
||||
table = db.open_table(table_name)
|
||||
table.add(data)
|
||||
else:
|
||||
table = db.create_table(table_name, data=data)
|
||||
def update_response(self, question, answer):
|
||||
self.response_text.delete('1.0', tk.END)
|
||||
self.response_text.insert(tk.END, f"Q: {question}\nA: {answer}\n\n")
|
||||
|
||||
def semantic_search(query, db_path, top_k=5):
|
||||
db = lancedb.connect(db_path)
|
||||
table = db.open_table('pdf_embeddings')
|
||||
query_embedding = get_embedding(query)
|
||||
# Suche in der Tabelle durchführen
|
||||
results = table.search(query_embedding).limit(top_k).to_df()
|
||||
# Dateilinks aus den Ergebnissen extrahieren
|
||||
file_links = results['file_link'].tolist()
|
||||
return file_links
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
# Argument Parser für Kommandozeilenargumente
|
||||
parser = argparse.ArgumentParser(description='PDF-Semantische Suche mit LanceDB')
|
||||
parser.add_argument('--db_dir', type=str, help='Pfad zum LanceDB-Verzeichnis', default='lancedb_data')
|
||||
args = parser.parse_args()
|
||||
|
||||
lancedb_path = args.db_dir
|
||||
|
||||
# Tkinter GUI initialisieren
|
||||
root = tk.Tk()
|
||||
root.withdraw() # Hauptfenster ausblenden
|
||||
|
||||
# Ordnerauswahl-Dialog öffnen
|
||||
pdf_directory = filedialog.askdirectory(title="Bitte wählen Sie das PDF-Verzeichnis aus")
|
||||
|
||||
# Überprüfen, ob das Verzeichnis existiert
|
||||
if not pdf_directory or not os.path.isdir(pdf_directory):
|
||||
print("Kein gültiges Verzeichnis ausgewählt. Das Programm wird beendet.")
|
||||
sys.exit(1)
|
||||
|
||||
# PDFs in LanceDB laden
|
||||
load_pdfs_to_lancedb(pdf_directory, lancedb_path)
|
||||
|
||||
# Semantische Suche durchführen
|
||||
while True:
|
||||
query = input("Gib deine Suchanfrage ein (oder 'exit' zum Beenden): ")
|
||||
if query.lower() == 'exit':
|
||||
break
|
||||
results = semantic_search(query, lancedb_path)
|
||||
print("Passende Dateien:")
|
||||
for file_link in results:
|
||||
print(file_link)
|
||||
app = LlamaVisionApp(root)
|
||||
root.mainloop()
|
||||
|
||||
Reference in New Issue
Block a user