diff --git a/RAG-Demo.py b/RAG-Demo.py index b2aba8f..f2e016b 100644 --- a/RAG-Demo.py +++ b/RAG-Demo.py @@ -3,9 +3,12 @@ import glob import lancedb from PyPDF2 import PdfReader import openai +import tkinter as tk +from tkinter import filedialog +from config import OPENAI_API_KEY # Setze deinen OpenAI API-Schlüssel -openai.api_key = 'DEIN_OPENAI_API_KEY' +openai.api_key = OPENAI_API_KEY def extract_text_from_pdf(file_path): reader = PdfReader(file_path) @@ -19,14 +22,14 @@ def extract_text_from_pdf(file_path): def get_embedding(text): # OpenAI Embeddings API verwenden response = openai.Embedding.create( - input=[text], + input=text, model="text-embedding-ada-002" ) embedding = response['data'][0]['embedding'] return embedding def load_pdfs_to_lancedb(directory, db_path): - # Verbindung zur LanceDB herstellen + # Verbindung zur LanceDB herstellen (synchroner Client) db = lancedb.connect(db_path) table_name = 'pdf_embeddings' data = [] @@ -47,7 +50,7 @@ def load_pdfs_to_lancedb(directory, db_path): table = db.open_table(table_name) table.add(data) else: - table = db.create_table(table_name, data=data, mode='overwrite') + table = db.create_table(table_name, data=data) def semantic_search(query, db_path, top_k=5): db = lancedb.connect(db_path) @@ -60,14 +63,37 @@ def semantic_search(query, db_path, top_k=5): return file_links if __name__ == "__main__": - # Ersetze durch deine tatsächlichen Pfade - pdf_directory = 'pfad_zum_pdf_verzeichnis' - lancedb_path = 'pfad_zum_lancedb_verzeichnis' + import sys + import argparse + + # Argument Parser für Kommandozeilenargumente + parser = argparse.ArgumentParser(description='PDF-Semantische Suche mit LanceDB') + parser.add_argument('--db_dir', type=str, help='Pfad zum LanceDB-Verzeichnis', default='lancedb_data') + args = parser.parse_args() + + lancedb_path = args.db_dir + + # Tkinter GUI initialisieren + root = tk.Tk() + root.withdraw() # Hauptfenster ausblenden + + # Ordnerauswahl-Dialog öffnen + pdf_directory = filedialog.askdirectory(title="Bitte wählen Sie das PDF-Verzeichnis aus") + + # Überprüfen, ob das Verzeichnis existiert + if not pdf_directory or not os.path.isdir(pdf_directory): + print("Kein gültiges Verzeichnis ausgewählt. Das Programm wird beendet.") + sys.exit(1) + # PDFs in LanceDB laden load_pdfs_to_lancedb(pdf_directory, lancedb_path) + # Semantische Suche durchführen - query = input("Gib deine Suchanfrage ein: ") - results = semantic_search(query, lancedb_path) - print("Passende Dateien:") - for file_link in results: - print(file_link) + while True: + query = input("Gib deine Suchanfrage ein (oder 'exit' zum Beenden): ") + if query.lower() == 'exit': + break + results = semantic_search(query, lancedb_path) + print("Passende Dateien:") + for file_link in results: + print(file_link) diff --git a/__pycache__/config.cpython-312.pyc b/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000..58501c5 Binary files /dev/null and b/__pycache__/config.cpython-312.pyc differ diff --git a/config.py b/config.py new file mode 100644 index 0000000..4420924 --- /dev/null +++ b/config.py @@ -0,0 +1,2 @@ +# OpenAI API Key +OPENAI_API_KEY = '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'