# To parse video ids import re # Youtube Transcript stuff import import youtube_transcript_api._errors from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter # OpenAI API stuff import from openai import AssistantEventHandler from openai import OpenAI # For streaming from typing_extensions import override # To get the env var from dotenv import load_dotenv import os load_dotenv() ### OpenAI Config # This is copy and pasted straight up from the quickstart guide: class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: print(f"\nassistant > ", end="", flush=True) @override def on_text_delta(self, delta, snapshot): print(delta.value, end="", flush=True) def on_tool_call_created(self, tool_call): print(f"\nassistant > {tool_call.type}\n", flush=True) def on_tool_call_delta(self, delta, snapshot): if delta.type == 'code_interpreter': if delta.code_interpreter.input: print(delta.code_interpreter.input, end="", flush=True) if delta.code_interpreter.outputs: print(f"\n\noutput >", flush=True) for output in delta.code_interpreter.outputs: if output.type == "logs": print(f"\n{output.logs}", flush=True) # Setting up OpenAI Client with API Key api_key = os.getenv("OPENAI_API_KEY") client = OpenAI( organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', project="proj_NGz8Kux8CSka7DRJucAlDCz6", api_key=api_key ) # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" # uhh no we need a new thread each time tf # make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early def create_and_stream(transcript): with client.beta.threads.create_and_run_stream( assistant_id=asst_screw_bardo_id, thread={ "messages" : [ {"role": "user", "content": transcript} ] }, event_handler=EventHandler() ) as stream: stream.until_done() messages = stream.get_final_messages() return messages[0].content[0].text.value def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' id = re.search(youtu_be, url) if not id: id = re.search(youtube_com, url) if not id: print("Couldn't parse video ID from URL") return None return id.group(1) # Takes the transcript and formats it in basic text before writing it to auto-transcript.txt def get_auto_transcript(video_id): trans_api_errors = youtube_transcript_api._errors try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) except trans_api_errors.TranscriptsDisabled: return None formatter = TextFormatter() # Ensure that you create an instance of TextFormatter txt_transcript = formatter.format_transcript(transcript) return txt_transcript # Stores the video id imputted by the user """ video_id = get_video_id() transcript = get_auto_transcript(video_id) create_and_stream(transcript) """