i forgot to stage these for the last commit rip
This commit is contained in:
136
app/main.py
136
app/main.py
@@ -1,83 +1,63 @@
|
|||||||
# To parse video ids
|
import os
|
||||||
import re
|
import re
|
||||||
|
import threading
|
||||||
# Youtube Transcript stuff import
|
import pytz
|
||||||
import youtube_transcript_api._errors
|
from datetime import datetime
|
||||||
from youtube_transcript_api import YouTubeTranscriptApi
|
from dotenv import load_dotenv
|
||||||
|
from youtube_transcript_api import YouTubeTranscriptApi, _errors
|
||||||
from youtube_transcript_api.formatters import TextFormatter
|
from youtube_transcript_api.formatters import TextFormatter
|
||||||
|
from openai import AssistantEventHandler, OpenAI
|
||||||
# OpenAI API stuff import
|
|
||||||
from openai import AssistantEventHandler
|
|
||||||
from openai import OpenAI
|
|
||||||
|
|
||||||
### For streaming
|
|
||||||
from typing_extensions import override
|
from typing_extensions import override
|
||||||
import asyncio
|
import asyncio
|
||||||
awaiter = asyncio.run
|
|
||||||
|
|
||||||
# The StreamOutput class to handle streaming
|
|
||||||
class StreamOutput:
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.delta: str = ""
|
|
||||||
self.response: str = ""
|
|
||||||
self.done: bool = False
|
|
||||||
self.buffer: list = []
|
|
||||||
|
|
||||||
def reset(self):
|
|
||||||
self.delta = ""
|
|
||||||
self.response = ""
|
|
||||||
self.done = False
|
|
||||||
self.buffer: list = []
|
|
||||||
|
|
||||||
async def send_delta(self, delta):
|
|
||||||
self.delta = delta
|
|
||||||
self.response += delta
|
|
||||||
def get_index(list):
|
|
||||||
if len(list) == 0:
|
|
||||||
return 0
|
|
||||||
else:
|
|
||||||
return len(list)-1
|
|
||||||
if self.buffer != []:
|
|
||||||
try:
|
|
||||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
|
||||||
self.buffer.append(delta)
|
|
||||||
except IndexError as index_error:
|
|
||||||
log(f"\nCaught IndexError: {str(index_error)}")
|
|
||||||
self.buffer.append(delta)
|
|
||||||
else: self.buffer.append(delta)
|
|
||||||
|
|
||||||
# To get the env var
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# For logging
|
# For logging
|
||||||
import pytz
|
def log(message: str):
|
||||||
from datetime import datetime
|
timestamp = datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')
|
||||||
|
with open("logs/log.md", "a") as file:
|
||||||
|
file.write(f"{timestamp} - {message}\n")
|
||||||
|
|
||||||
def log(str):
|
# StreamOutput class to handle streaming
|
||||||
with open("logs/log.md", "at") as file:
|
class StreamOutput:
|
||||||
file.write(str)
|
def __init__(self):
|
||||||
|
self.response = ""
|
||||||
|
self.done = False
|
||||||
|
self.buffer = []
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
### OpenAI Config
|
def reset(self):
|
||||||
|
with self.lock:
|
||||||
|
self.response = ""
|
||||||
|
self.done = False
|
||||||
|
self.buffer = []
|
||||||
|
|
||||||
# Setting up OpenAI Client with API Key
|
def add_to_buffer(self, delta: str):
|
||||||
|
with self.lock:
|
||||||
|
self.response += delta
|
||||||
|
self.buffer.append(delta)
|
||||||
|
|
||||||
|
output_stream = StreamOutput()
|
||||||
|
|
||||||
|
# OpenAI Client Configuration
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||||
api_key=os.getenv("OPENAI_API_KEY")
|
api_key=os.getenv("OPENAI_API_KEY")
|
||||||
)
|
)
|
||||||
|
|
||||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
|
||||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||||
|
|
||||||
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
# Async helper
|
||||||
|
def awaiter(coro):
|
||||||
|
asyncio.run(coro)
|
||||||
|
|
||||||
|
# EventHandler for OpenAI Assistant
|
||||||
class EventHandler(AssistantEventHandler):
|
class EventHandler(AssistantEventHandler):
|
||||||
@override
|
@override
|
||||||
def on_text_created(self, text) -> None:
|
def on_text_created(self, text) -> None:
|
||||||
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
awaiter(output_stream.send_delta("Response Received:\n\nScrew-Bardo:\n\n"))
|
||||||
|
|
||||||
@override
|
@override
|
||||||
def on_text_delta(self, delta, snapshot):
|
def on_text_delta(self, delta, snapshot):
|
||||||
@@ -86,7 +66,8 @@ class EventHandler(AssistantEventHandler):
|
|||||||
def on_tool_call_created(self, tool_call):
|
def on_tool_call_created(self, tool_call):
|
||||||
raise Exception("Assistant shouldn't be calling tools.")
|
raise Exception("Assistant shouldn't be calling tools.")
|
||||||
|
|
||||||
def create_and_stream(transcript):
|
def create_and_stream(transcript: str):
|
||||||
|
try:
|
||||||
with client.beta.threads.create_and_run_stream(
|
with client.beta.threads.create_and_run_stream(
|
||||||
assistant_id=asst_screw_bardo_id,
|
assistant_id=asst_screw_bardo_id,
|
||||||
thread={
|
thread={
|
||||||
@@ -96,33 +77,26 @@ def create_and_stream(transcript):
|
|||||||
) as stream:
|
) as stream:
|
||||||
stream.until_done()
|
stream.until_done()
|
||||||
output_stream.done = True
|
output_stream.done = True
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Error in create_and_stream: {e}")
|
||||||
|
output_stream.done = True
|
||||||
|
|
||||||
def get_video_id(url):
|
def get_video_id(url: str) -> str:
|
||||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||||
|
|
||||||
id = re.search(youtu_be, url)
|
match = re.search(youtu_be, url) or re.search(youtube_com, url)
|
||||||
if not id:
|
if match:
|
||||||
id = re.search(youtube_com, url)
|
return match.group(1)
|
||||||
|
|
||||||
if not id:
|
|
||||||
# Couldn't parse video ID from URL
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return id.group(1)
|
def get_auto_transcript(video_id: str) -> str:
|
||||||
|
|
||||||
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
|
|
||||||
def get_auto_transcript(video_id):
|
|
||||||
trans_api_errors = youtube_transcript_api._errors
|
|
||||||
try:
|
try:
|
||||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
||||||
except trans_api_errors.TranscriptsDisabled as e:
|
formatter = TextFormatter()
|
||||||
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
return formatter.format_transcript(transcript)
|
||||||
|
except _errors.TranscriptsDisabled as e:
|
||||||
|
log(f'Exception while fetching transcript: {e}')
|
||||||
|
except Exception as e:
|
||||||
|
log(f'Unexpected error while fetching transcript: {e}')
|
||||||
return None
|
return None
|
||||||
|
|
||||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
|
||||||
|
|
||||||
txt_transcript = formatter.format_transcript(transcript)
|
|
||||||
return txt_transcript
|
|
||||||
|
|
||||||
output_stream = StreamOutput()
|
|
||||||
@@ -1,5 +1,3 @@
|
|||||||
version: '3.8'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
app:
|
app:
|
||||||
build: .
|
build: .
|
||||||
|
|||||||
Reference in New Issue
Block a user