From cde37492af61ac57709684361e5024b0f67752cc Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Wed, 20 Nov 2024 15:51:21 -0500 Subject: [PATCH] i forgot to stage these for the last commit rip --- app/main.py | 186 +++++++++++++++++++-------------------------- docker-compose.yml | 2 - 2 files changed, 80 insertions(+), 108 deletions(-) diff --git a/app/main.py b/app/main.py index edce716..c1f5d58 100644 --- a/app/main.py +++ b/app/main.py @@ -1,128 +1,102 @@ -# To parse video ids +import os import re - -# Youtube Transcript stuff import -import youtube_transcript_api._errors -from youtube_transcript_api import YouTubeTranscriptApi +import threading +import pytz +from datetime import datetime +from dotenv import load_dotenv +from youtube_transcript_api import YouTubeTranscriptApi, _errors from youtube_transcript_api.formatters import TextFormatter - -# OpenAI API stuff import -from openai import AssistantEventHandler -from openai import OpenAI - -### For streaming +from openai import AssistantEventHandler, OpenAI from typing_extensions import override import asyncio -awaiter = asyncio.run - -# The StreamOutput class to handle streaming -class StreamOutput: - - def __init__(self): - self.delta: str = "" - self.response: str = "" - self.done: bool = False - self.buffer: list = [] - - def reset(self): - self.delta = "" - self.response = "" - self.done = False - self.buffer: list = [] - - async def send_delta(self, delta): - self.delta = delta - self.response += delta - def get_index(list): - if len(list) == 0: - return 0 - else: - return len(list)-1 - if self.buffer != []: - try: - if self.delta != self.buffer[get_index(self.buffer)]: - self.buffer.append(delta) - except IndexError as index_error: - log(f"\nCaught IndexError: {str(index_error)}") - self.buffer.append(delta) - else: self.buffer.append(delta) - -# To get the env var -from dotenv import load_dotenv -import os +# Load environment variables load_dotenv() # For logging -import pytz -from datetime import datetime +def log(message: str): + timestamp = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S') + with open("logs/log.md", "a") as file: + file.write(f"{timestamp} - {message}\n") -def log(str): - with open("logs/log.md", "at") as file: - file.write(str) +# StreamOutput class to handle streaming +class StreamOutput: + def __init__(self): + self.response = "" + self.done = False + self.buffer = [] + self.lock = threading.Lock() -### OpenAI Config + def reset(self): + with self.lock: + self.response = "" + self.done = False + self.buffer = [] -# Setting up OpenAI Client with API Key + def add_to_buffer(self, delta: str): + with self.lock: + self.response += delta + self.buffer.append(delta) + +output_stream = StreamOutput() + +# OpenAI Client Configuration client = OpenAI( - organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', - project="proj_NGz8Kux8CSka7DRJucAlDCz6", - api_key=os.getenv("OPENAI_API_KEY") + organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', + project="proj_NGz8Kux8CSka7DRJucAlDCz6", + api_key=os.getenv("OPENAI_API_KEY") ) -# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" -# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) - - @override - def on_text_delta(self, delta, snapshot): - awaiter(output_stream.send_delta(delta.value)) +# Async helper +def awaiter(coro): + asyncio.run(coro) - def on_tool_call_created(self, tool_call): - raise Exception("Assistant shouldn't be calling tools.") +# EventHandler for OpenAI Assistant +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + awaiter(output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n")) -def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - output_stream.done = True + @override + def on_text_delta(self, delta, snapshot): + awaiter(output_stream.send_delta(delta.value)) -def get_video_id(url): - youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' - youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' - - id = re.search(youtu_be, url) - if not id: - id = re.search(youtube_com, url) - - if not id: - # Couldn't parse video ID from URL - return None - - return id.group(1) + def on_tool_call_created(self, tool_call): + raise Exception("Assistant shouldn't be calling tools.") -# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt -def get_auto_transcript(video_id): - trans_api_errors = youtube_transcript_api._errors - try: - transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled as e: - log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') +def create_and_stream(transcript: str): + try: + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + except Exception as e: + log(f"Error in create_and_stream: {e}") + output_stream.done = True + +def get_video_id(url: str) -> str: + youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' + youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' + + match = re.search(youtu_be, url) or re.search(youtube_com, url) + if match: + return match.group(1) return None - - formatter = TextFormatter() # Ensure that you create an instance of TextFormatter - txt_transcript = formatter.format_transcript(transcript) - return txt_transcript - -output_stream = StreamOutput() \ No newline at end of file +def get_auto_transcript(video_id: str) -> str: + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) + formatter = TextFormatter() + return formatter.format_transcript(transcript) + except _errors.TranscriptsDisabled as e: + log(f'Exception while fetching transcript: {e}') + except Exception as e: + log(f'Unexpected error while fetching transcript: {e}') + return None \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 403d03b..83defbb 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3.8' - services: app: build: .