# To parse video ids import re # Youtube Transcript stuff import import youtube_transcript_api._errors from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter # OpenAI API stuff import from openai import AssistantEventHandler from openai import OpenAI ### For streaming from typing_extensions import override import asyncio awaiter = asyncio.run # The StreamOutput class to handle streaming class StreamOutput: def __init__(self): self.delta: str = "" self.response: str = "" self.done: bool = False self.buffer: list = [] def reset(self): self.delta = "" self.response = "" self.done = False self.buffer: list = [] async def send_delta(self, delta): self.delta = delta self.response += delta def get_index(list): if len(list) == 0: return 0 else: return len(list)-1 if self.buffer != []: try: if self.delta != self.buffer[get_index(self.buffer)]: self.buffer.append(delta) except IndexError as index_error: log(f"\nCaught IndexError: {str(index_error)}") self.buffer.append(delta) else: self.buffer.append(delta) # To get the env var from dotenv import load_dotenv import os load_dotenv() # For logging import pytz from datetime import datetime def log(str): try: with open("logs/log.md", "a") as file: file.write(str) except FileNotFoundError: os.system("touch logs/log.md") log("#### log.md was not found, so it was just touched.") ### OpenAI Config # Setting up OpenAI Client with API Key client = OpenAI( organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', project="proj_NGz8Kux8CSka7DRJucAlDCz6", api_key=os.getenv("OPENAI_API_KEY") ) # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" # This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) @override def on_text_delta(self, delta, snapshot): awaiter(output_stream.send_delta(delta.value)) def on_tool_call_created(self, tool_call): raise Exception("Assistant shouldn't be calling tools.") def create_and_stream(transcript): with client.beta.threads.create_and_run_stream( assistant_id=asst_screw_bardo_id, thread={ "messages": [{"role": "user", "content": transcript}] }, event_handler=EventHandler() ) as stream: stream.until_done() output_stream.done = True def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' id = re.search(youtu_be, url) if not id: id = re.search(youtube_com, url) if not id: # Couldn't parse video ID from URL return None return id.group(1) # Takes the transcript and formats it in basic text before writing it to auto-transcript.txt def get_auto_transcript(video_id): trans_api_errors = youtube_transcript_api._errors try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) except trans_api_errors.TranscriptsDisabled as e: log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') return None formatter = TextFormatter() # Ensure that you create an instance of TextFormatter txt_transcript = formatter.format_transcript(transcript) return txt_transcript output_stream = StreamOutput() log(f"# Main initilized at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}. Presumeably application starting.")