i forgot to stage these for the last commit rip

This commit is contained in:
ForeverPyrite
2024-11-20 15:51:21 -05:00
parent c363f4c246
commit cde37492af
2 changed files with 80 additions and 108 deletions

View File

@@ -1,128 +1,102 @@
# To parse video ids
import os
import re
# Youtube Transcript stuff import
import youtube_transcript_api._errors
from youtube_transcript_api import YouTubeTranscriptApi
import threading
import pytz
from datetime import datetime
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi, _errors
from youtube_transcript_api.formatters import TextFormatter
# OpenAI API stuff import
from openai import AssistantEventHandler
from openai import OpenAI
### For streaming
from openai import AssistantEventHandler, OpenAI
from typing_extensions import override
import asyncio
awaiter = asyncio.run
# The StreamOutput class to handle streaming
class StreamOutput:
def __init__(self):
self.delta: str = ""
self.response: str = ""
self.done: bool = False
self.buffer: list = []
def reset(self):
self.delta = ""
self.response = ""
self.done = False
self.buffer: list = []
async def send_delta(self, delta):
self.delta = delta
self.response += delta
def get_index(list):
if len(list) == 0:
return 0
else:
return len(list)-1
if self.buffer != []:
try:
if self.delta != self.buffer[get_index(self.buffer)]:
self.buffer.append(delta)
except IndexError as index_error:
log(f"\nCaught IndexError: {str(index_error)}")
self.buffer.append(delta)
else: self.buffer.append(delta)
# To get the env var
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
# For logging
import pytz
from datetime import datetime
def log(message: str):
timestamp = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')
with open("logs/log.md", "a") as file:
file.write(f"{timestamp} - {message}\n")
def log(str):
with open("logs/log.md", "at") as file:
file.write(str)
# StreamOutput class to handle streaming
class StreamOutput:
def __init__(self):
self.response = ""
self.done = False
self.buffer = []
self.lock = threading.Lock()
### OpenAI Config
def reset(self):
with self.lock:
self.response = ""
self.done = False
self.buffer = []
# Setting up OpenAI Client with API Key
client = OpenAI(
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
api_key=os.getenv("OPENAI_API_KEY")
)
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
@override
def on_text_delta(self, delta, snapshot):
awaiter(output_stream.send_delta(delta.value))
def on_tool_call_created(self, tool_call):
raise Exception("Assistant shouldn't be calling tools.")
def create_and_stream(transcript):
with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id,
thread={
"messages": [{"role": "user", "content": transcript}]
},
event_handler=EventHandler()
) as stream:
stream.until_done()
output_stream.done = True
def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
id = re.search(youtu_be, url)
if not id:
id = re.search(youtube_com, url)
if not id:
# Couldn't parse video ID from URL
return None
return id.group(1)
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
def get_auto_transcript(video_id):
trans_api_errors = youtube_transcript_api._errors
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
except trans_api_errors.TranscriptsDisabled as e:
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
return None
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
txt_transcript = formatter.format_transcript(transcript)
return txt_transcript
def add_to_buffer(self, delta: str):
with self.lock:
self.response += delta
self.buffer.append(delta)
output_stream = StreamOutput()
# OpenAI Client Configuration
client = OpenAI(
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
api_key=os.getenv("OPENAI_API_KEY")
)
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# Async helper
def awaiter(coro):
asyncio.run(coro)
# EventHandler for OpenAI Assistant
class EventHandler(AssistantEventHandler):
@override
def on_text_created(self, text) -> None:
awaiter(output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n"))
@override
def on_text_delta(self, delta, snapshot):
awaiter(output_stream.send_delta(delta.value))
def on_tool_call_created(self, tool_call):
raise Exception("Assistant shouldn't be calling tools.")
def create_and_stream(transcript: str):
try:
with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id,
thread={
"messages": [{"role": "user", "content": transcript}]
},
event_handler=EventHandler()
) as stream:
stream.until_done()
output_stream.done = True
except Exception as e:
log(f"Error in create_and_stream: {e}")
output_stream.done = True
def get_video_id(url: str) -> str:
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
match = re.search(youtu_be, url) or re.search(youtube_com, url)
if match:
return match.group(1)
return None
def get_auto_transcript(video_id: str) -> str:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
formatter = TextFormatter()
return formatter.format_transcript(transcript)
except _errors.TranscriptsDisabled as e:
log(f'Exception while fetching transcript: {e}')
except Exception as e:
log(f'Unexpected error while fetching transcript: {e}')
return None

View File

@@ -1,5 +1,3 @@
version: '3.8'
services:
app:
build: .