Files
screw-bardo/main.py

102 lines
2.9 KiB
Python

# To parse video ids
import re
# Youtube Transcript stuff import
import youtube_transcript_api._errors
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
# OpenAI API stuff import
from openai import AssistantEventHandler
from openai import OpenAI
# For streaming
from typing_extensions import override
import threading
# Output buffer and thread lock
output_buffer = []
output_lock = threading.Lock()
# To get the env var
from dotenv import load_dotenv
import os
load_dotenv()
### OpenAI Config
# Setting up OpenAI Client with API Key
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
api_key=api_key
)
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
with output_lock:
output_buffer.append(f"\nassistant > {text}")
@override
def on_text_delta(self, delta, snapshot):
with output_lock:
output_buffer.append(delta.value)
def on_tool_call_created(self, tool_call):
with output_lock:
output_buffer.append(f"\nassistant > {tool_call.type}\n")
def create_and_stream(transcript):
with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id,
thread={
"messages": [{"role": "user", "content": transcript}]
},
event_handler=EventHandler()
) as stream:
stream.until_done()
def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
id = re.search(youtu_be, url)
if not id:
id = re.search(youtube_com, url)
if not id:
print("Couldn't parse video ID from URL")
return None
return id.group(1)
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
def get_auto_transcript(video_id):
trans_api_errors = youtube_transcript_api._errors
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
except trans_api_errors.TranscriptsDisabled:
return None
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
txt_transcript = formatter.format_transcript(transcript)
return txt_transcript
# Stores the video id imputted by the user
"""
video_id = get_video_id()
transcript = get_auto_transcript(video_id)
create_and_stream(transcript)
"""