Files
PHIS/filesearch.py

123 lines
3.7 KiB
Python

import os
from openai import OpenAI
from tkinter import Tk, Label, Entry, Button, Text, filedialog
from dotenv import load_dotenv
load_dotenv()
def list_files(directory):
files = []
for root, dirs, filenames in os.walk(directory):
# Versteckte Verzeichnisse ausschließen
dirs[:] = [d for d in dirs if not d.startswith('.')]
for filename in filenames:
if filename.endswith(('.docx', '.pdf')):
file_path = os.path.join(root, filename)
files.append(file_path)
return files
def compare_with_openai(content, search_query):
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
)
try:
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{
"role": "user",
"content": f"Vergleiche den folgenden Text mit der Suchanfrage '{search_query}':\n\n{content[:1000]}...\n\nIst der Text relevant für die Suchanfrage? Antworte mit 'Ja' oder 'Nein'."
},
],
max_tokens=5,
temperature=0.0,
)
answer = completion.choices[0].message.content.strip().lower()
return "ja" in answer
except Exception as e:
print(f"Fehler bei der OpenAI-API-Anfrage: {e}")
return False
def read_file_content(file_path):
if file_path.endswith('.docx'):
try:
import docx
doc = docx.Document(file_path)
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
return '\n'.join(fullText)
except Exception as e:
print(f"Fehler beim Lesen der DOCX-Datei {file_path}: {e}")
return ''
elif file_path.endswith('.pdf'):
try:
import PyPDF2
with open(file_path, 'rb') as pdf_file:
reader = PyPDF2.PdfReader(pdf_file)
text = ''
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
except Exception as e:
print(f"Fehler beim Lesen der PDF-Datei {file_path}: {e}")
return ''
else:
return ''
def search_files():
search_query = query_entry.get()
if not search_query:
print("Keine Suchanfrage eingegeben.")
return
directory = filedialog.askdirectory()
if not directory:
print("Kein Verzeichnis ausgewählt.")
return
if not os.path.exists(directory):
print("Verzeichnis existiert nicht.")
return
files = list_files(directory)
found_files = []
for file_path in files:
file_content = read_file_content(file_path)
if file_content:
if compare_with_openai(file_content, search_query):
found_files.append(file_path)
result_text.delete(1.0, 'end')
if found_files:
result_text.insert('end', "Dateien relevant für die Suchanfrage:\n")
for file in found_files:
result_text.insert('end', file + '\n')
else:
result_text.insert('end', "Keine relevanten Dateien gefunden.")
# GUI erstellen
root = Tk()
root.title("Dateisuche mit OpenAI")
root.geometry("600x400")
# Eingabefeld für die Suchanfrage
query_label = Label(root, text="Suchanfrage:")
query_label.pack(pady=5)
query_entry = Entry(root, width=50)
query_entry.pack(pady=5)
# Suchbutton
search_button = Button(root, text="Suchen", command=search_files)
search_button.pack(pady=10)
# Ergebnisanzeige
result_text = Text(root, height=15, width=70)
result_text.pack(pady=10)
root.mainloop()