screw-bardo/main.py


# To parse video ids
import re

# Youtube Transcript stuff import
import youtube_transcript_api._errors
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

# OpenAI API stuff import
from openai import AssistantEventHandler
from openai import OpenAI
# For streaming
from typing_extensions import override

# To get the env var
from dotenv import load_dotenv
import os

load_dotenv()

### OpenAI Config

# This is copy and pasted straight up from the quickstart guide:
class EventHandler(AssistantEventHandler):
  @override
  def on_text_created(self, text) -> None:
    print(f"\nassistant > ", end="", flush=True)

  @override
  def on_text_delta(self, delta, snapshot):
    print(delta.value, end="", flush=True)

  def on_tool_call_created(self, tool_call):
    print(f"\nassistant > {tool_call.type}\n", flush=True)

  def on_tool_call_delta(self, delta, snapshot):
    if delta.type == 'code_interpreter':
      if delta.code_interpreter.input:
        print(delta.code_interpreter.input, end="", flush=True)
      if delta.code_interpreter.outputs:
        print(f"\n\noutput >", flush=True)
        for output in delta.code_interpreter.outputs:
          if output.type == "logs":
            print(f"\n{output.logs}", flush=True)


# Setting up OpenAI Client with API Key
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(
  organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
  project="proj_NGz8Kux8CSka7DRJucAlDCz6",
  api_key=api_key
)

# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"

# uhh no we need a new thread each time tf
# make sure to call the function after the transcript is confirmed to work, it would be very stupid to call the function and make a new thread this early
def create_and_stream(transcript):
  with client.beta.threads.create_and_run_stream(
    assistant_id=asst_screw_bardo_id,
    thread={
      "messages" : [
        {"role": "user",
         "content": transcript}
      ]
    },
    event_handler=EventHandler()
  ) as stream:
    stream.until_done()
  messages = stream.get_final_messages()
  return messages[0].content[0].text.value

def get_video_id(url):
  youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
  youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'

  id = re.search(youtu_be, url)
  if not id:
      id = re.search(youtube_com, url)

  if not id:
      print("Couldn't parse video ID from URL")
      return None

  return id.group(1)

# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
def get_auto_transcript(video_id):
  trans_api_errors = youtube_transcript_api._errors
  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
  except trans_api_errors.TranscriptsDisabled:
    return None

  formatter = TextFormatter()  # Ensure that you create an instance of TextFormatter

  txt_transcript = formatter.format_transcript(transcript)
  return txt_transcript


# Stores the video id imputted by the user
"""
video_id = get_video_id()

transcript = get_auto_transcript(video_id)

create_and_stream(transcript)
"""