# To parse video ids import re # Youtube Transcript stuff import import youtube_transcript_api._errors from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter # OpenAI API stuff import from openai import AssistantEventHandler from openai import OpenAI # For streaming from typing_extensions import override import threading # Output buffer and thread lock output_buffer = [] output_lock = threading.Lock() # To get the env var from dotenv import load_dotenv import os load_dotenv() ### OpenAI Config # Setting up OpenAI Client with API Key api_key = os.getenv("OPENAI_API_KEY") client = OpenAI( organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', project="proj_NGz8Kux8CSka7DRJucAlDCz6", api_key=api_key ) # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" # This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: class EventHandler(AssistantEventHandler): @override def on_text_created(self, text) -> None: with output_lock: output_buffer.append(f"\nassistant > {text}") @override def on_text_delta(self, delta, snapshot): with output_lock: output_buffer.append(delta.value) def on_tool_call_created(self, tool_call): with output_lock: output_buffer.append(f"\nassistant > {tool_call.type}\n") def create_and_stream(transcript): with client.beta.threads.create_and_run_stream( assistant_id=asst_screw_bardo_id, thread={ "messages": [{"role": "user", "content": transcript}] }, event_handler=EventHandler() ) as stream: stream.until_done() def get_video_id(url): youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' id = re.search(youtu_be, url) if not id: id = re.search(youtube_com, url) if not id: print("Couldn't parse video ID from URL") return None return id.group(1) # Takes the transcript and formats it in basic text before writing it to auto-transcript.txt def get_auto_transcript(video_id): trans_api_errors = youtube_transcript_api._errors try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) except trans_api_errors.TranscriptsDisabled: return None formatter = TextFormatter() # Ensure that you create an instance of TextFormatter txt_transcript = formatter.format_transcript(transcript) return txt_transcript # Stores the video id imputted by the user """ video_id = get_video_id() transcript = get_auto_transcript(video_id) create_and_stream(transcript) """