nicht funktionierende version
This commit is contained in:
42
RAG-Demo.py
42
RAG-Demo.py
@@ -3,9 +3,12 @@ import glob
|
||||
import lancedb
|
||||
from PyPDF2 import PdfReader
|
||||
import openai
|
||||
import tkinter as tk
|
||||
from tkinter import filedialog
|
||||
from config import OPENAI_API_KEY
|
||||
|
||||
# Setze deinen OpenAI API-Schlüssel
|
||||
openai.api_key = 'DEIN_OPENAI_API_KEY'
|
||||
openai.api_key = OPENAI_API_KEY
|
||||
|
||||
def extract_text_from_pdf(file_path):
|
||||
reader = PdfReader(file_path)
|
||||
@@ -19,14 +22,14 @@ def extract_text_from_pdf(file_path):
|
||||
def get_embedding(text):
|
||||
# OpenAI Embeddings API verwenden
|
||||
response = openai.Embedding.create(
|
||||
input=[text],
|
||||
input=text,
|
||||
model="text-embedding-ada-002"
|
||||
)
|
||||
embedding = response['data'][0]['embedding']
|
||||
return embedding
|
||||
|
||||
def load_pdfs_to_lancedb(directory, db_path):
|
||||
# Verbindung zur LanceDB herstellen
|
||||
# Verbindung zur LanceDB herstellen (synchroner Client)
|
||||
db = lancedb.connect(db_path)
|
||||
table_name = 'pdf_embeddings'
|
||||
data = []
|
||||
@@ -47,7 +50,7 @@ def load_pdfs_to_lancedb(directory, db_path):
|
||||
table = db.open_table(table_name)
|
||||
table.add(data)
|
||||
else:
|
||||
table = db.create_table(table_name, data=data, mode='overwrite')
|
||||
table = db.create_table(table_name, data=data)
|
||||
|
||||
def semantic_search(query, db_path, top_k=5):
|
||||
db = lancedb.connect(db_path)
|
||||
@@ -60,13 +63,36 @@ def semantic_search(query, db_path, top_k=5):
|
||||
return file_links
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Ersetze durch deine tatsächlichen Pfade
|
||||
pdf_directory = 'pfad_zum_pdf_verzeichnis'
|
||||
lancedb_path = 'pfad_zum_lancedb_verzeichnis'
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
# Argument Parser für Kommandozeilenargumente
|
||||
parser = argparse.ArgumentParser(description='PDF-Semantische Suche mit LanceDB')
|
||||
parser.add_argument('--db_dir', type=str, help='Pfad zum LanceDB-Verzeichnis', default='lancedb_data')
|
||||
args = parser.parse_args()
|
||||
|
||||
lancedb_path = args.db_dir
|
||||
|
||||
# Tkinter GUI initialisieren
|
||||
root = tk.Tk()
|
||||
root.withdraw() # Hauptfenster ausblenden
|
||||
|
||||
# Ordnerauswahl-Dialog öffnen
|
||||
pdf_directory = filedialog.askdirectory(title="Bitte wählen Sie das PDF-Verzeichnis aus")
|
||||
|
||||
# Überprüfen, ob das Verzeichnis existiert
|
||||
if not pdf_directory or not os.path.isdir(pdf_directory):
|
||||
print("Kein gültiges Verzeichnis ausgewählt. Das Programm wird beendet.")
|
||||
sys.exit(1)
|
||||
|
||||
# PDFs in LanceDB laden
|
||||
load_pdfs_to_lancedb(pdf_directory, lancedb_path)
|
||||
|
||||
# Semantische Suche durchführen
|
||||
query = input("Gib deine Suchanfrage ein: ")
|
||||
while True:
|
||||
query = input("Gib deine Suchanfrage ein (oder 'exit' zum Beenden): ")
|
||||
if query.lower() == 'exit':
|
||||
break
|
||||
results = semantic_search(query, lancedb_path)
|
||||
print("Passende Dateien:")
|
||||
for file_link in results:
|
||||
|
||||
BIN
__pycache__/config.cpython-312.pyc
Normal file
BIN
__pycache__/config.cpython-312.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user