From f905f5649c9e27dd365e84d95ffe0aef172a8be2 Mon Sep 17 00:00:00 2001 From: Martin Rattensberger Date: Wed, 4 Dec 2024 18:21:55 +0100 Subject: [PATCH] nicht funktionierende version --- RAG-Demo.py | 50 ++++++++++++++++++++++------- __pycache__/config.cpython-312.pyc | Bin 0 -> 319 bytes config.py | 2 ++ 3 files changed, 40 insertions(+), 12 deletions(-) create mode 100644 __pycache__/config.cpython-312.pyc create mode 100644 config.py diff --git a/RAG-Demo.py b/RAG-Demo.py index b2aba8f..f2e016b 100644 --- a/RAG-Demo.py +++ b/RAG-Demo.py @@ -3,9 +3,12 @@ import glob import lancedb from PyPDF2 import PdfReader import openai +import tkinter as tk +from tkinter import filedialog +from config import OPENAI_API_KEY # Setze deinen OpenAI API-Schlüssel -openai.api_key = 'DEIN_OPENAI_API_KEY' +openai.api_key = OPENAI_API_KEY def extract_text_from_pdf(file_path): reader = PdfReader(file_path) @@ -19,14 +22,14 @@ def extract_text_from_pdf(file_path): def get_embedding(text): # OpenAI Embeddings API verwenden response = openai.Embedding.create( - input=[text], + input=text, model="text-embedding-ada-002" ) embedding = response['data'][0]['embedding'] return embedding def load_pdfs_to_lancedb(directory, db_path): - # Verbindung zur LanceDB herstellen + # Verbindung zur LanceDB herstellen (synchroner Client) db = lancedb.connect(db_path) table_name = 'pdf_embeddings' data = [] @@ -47,7 +50,7 @@ def load_pdfs_to_lancedb(directory, db_path): table = db.open_table(table_name) table.add(data) else: - table = db.create_table(table_name, data=data, mode='overwrite') + table = db.create_table(table_name, data=data) def semantic_search(query, db_path, top_k=5): db = lancedb.connect(db_path) @@ -60,14 +63,37 @@ def semantic_search(query, db_path, top_k=5): return file_links if __name__ == "__main__": - # Ersetze durch deine tatsächlichen Pfade - pdf_directory = 'pfad_zum_pdf_verzeichnis' - lancedb_path = 'pfad_zum_lancedb_verzeichnis' + import sys + import argparse + + # Argument Parser für Kommandozeilenargumente + parser = argparse.ArgumentParser(description='PDF-Semantische Suche mit LanceDB') + parser.add_argument('--db_dir', type=str, help='Pfad zum LanceDB-Verzeichnis', default='lancedb_data') + args = parser.parse_args() + + lancedb_path = args.db_dir + + # Tkinter GUI initialisieren + root = tk.Tk() + root.withdraw() # Hauptfenster ausblenden + + # Ordnerauswahl-Dialog öffnen + pdf_directory = filedialog.askdirectory(title="Bitte wählen Sie das PDF-Verzeichnis aus") + + # Überprüfen, ob das Verzeichnis existiert + if not pdf_directory or not os.path.isdir(pdf_directory): + print("Kein gültiges Verzeichnis ausgewählt. Das Programm wird beendet.") + sys.exit(1) + # PDFs in LanceDB laden load_pdfs_to_lancedb(pdf_directory, lancedb_path) + # Semantische Suche durchführen - query = input("Gib deine Suchanfrage ein: ") - results = semantic_search(query, lancedb_path) - print("Passende Dateien:") - for file_link in results: - print(file_link) + while True: + query = input("Gib deine Suchanfrage ein (oder 'exit' zum Beenden): ") + if query.lower() == 'exit': + break + results = semantic_search(query, lancedb_path) + print("Passende Dateien:") + for file_link in results: + print(file_link) diff --git a/__pycache__/config.cpython-312.pyc b/__pycache__/config.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58501c5fa156713e0faf8ce8fc7e4650e8b86eb3 GIT binary patch literal 319 zcmX@j%ge<81h0z&(oX~F#~=<2FhUuhIe?7m3@Hpz43&(UOjS#YvvmuK^0RcEeZxY7 zi*qA9i*#KQjT0@B3LKp(-3`i&+*}MZDqIsY+>6bf3qz{ROOjL59mCxN3`?_8e5)KQ zLPLuS;>*GvBTaQ3gEE3j3PX&Ya+0#$yh;)+LVVmzobp}W0}PD)GfNyDT}*uf!_wlb zOrt_elU>523=Il`JtDlqs`A65Qr(?BN>huovz<*neSMOXLvvDc%0hCA(?T8nG#PL4 z`3JcAIeNxB26)DMyGE{L_zd#VFZpDv7`M!#+{74f&lsQlF@s$~3 literal 0 HcmV?d00001 diff --git a/config.py b/config.py new file mode 100644 index 0000000..4420924 --- /dev/null +++ b/config.py @@ -0,0 +1,2 @@ +# OpenAI API Key +OPENAI_API_KEY = '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'