Dockerized

This commit is contained in:
ForeverPyrite
2024-11-20 12:02:27 -05:00
parent 4bc7722162
commit cb4a781d3f
13 changed files with 187 additions and 128 deletions

12
.dockerignore Normal file
View File

@@ -0,0 +1,12 @@
__pycache__
*.pyc
*.pyo
*.pyd
*.env
*venv/
.env
*.git
.gitignore
Dockerfile
docker-compose.yml
log.md

31
Dockerfile Normal file
View File

@@ -0,0 +1,31 @@
# Use an official Python runtime as a parent image
FROM python:3.11-slim
# Set environment variables
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Set work directory
WORKDIR /app
# Install Python dependencies
COPY requirements.txt .
RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt
# Copy application files
COPY . /app
# Make start.sh executable
RUN chmod +x start.sh
# Expose the port the app runs on
EXPOSE 1986
# Specify the entrypoint script
ENTRYPOINT ["./start.sh"]

View File

View File

@@ -1,129 +1,129 @@
# To parse video ids # To parse video ids
import re import re
# Youtube Transcript stuff import # Youtube Transcript stuff import
import youtube_transcript_api._errors import youtube_transcript_api._errors
from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter from youtube_transcript_api.formatters import TextFormatter
# OpenAI API stuff import # OpenAI API stuff import
from openai import AssistantEventHandler from openai import AssistantEventHandler
from openai import OpenAI from openai import OpenAI
### For streaming ### For streaming
from typing_extensions import override from typing_extensions import override
import asyncio import asyncio
awaiter = asyncio.run awaiter = asyncio.run
# The StreamOutput class to handle streaming # The StreamOutput class to handle streaming
class StreamOutput: class StreamOutput:
def __init__(self): def __init__(self):
self.delta: str = "" self.delta: str = ""
self.response: str = "" self.response: str = ""
self.done: bool = False self.done: bool = False
self.buffer: list = [] self.buffer: list = []
def reset(self): def reset(self):
self.delta = "" self.delta = ""
self.response = "" self.response = ""
self.done = False self.done = False
self.buffer: list = [] self.buffer: list = []
async def send_delta(self, delta): async def send_delta(self, delta):
self.delta = delta self.delta = delta
self.response += delta self.response += delta
def get_index(list): def get_index(list):
if len(list) == 0: if len(list) == 0:
return 0 return 0
else: else:
return len(list)-1 return len(list)-1
if self.buffer != []: if self.buffer != []:
try: try:
if self.delta != self.buffer[get_index(self.buffer)]: if self.delta != self.buffer[get_index(self.buffer)]:
self.buffer.append(delta) self.buffer.append(delta)
except IndexError as index_error: except IndexError as index_error:
log(f"\nCaught IndexError: {str(index_error)}") log(f"\nCaught IndexError: {str(index_error)}")
self.buffer.append(delta) self.buffer.append(delta)
else: self.buffer.append(delta) else: self.buffer.append(delta)
# To get the env var # To get the env var
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
# For logging # For logging
import pytz import pytz
from datetime import datetime from datetime import datetime
def log(str): def log(str):
with open("log.md", "at") as file: with open("log.md", "at") as file:
file.write(str) file.write(str)
### OpenAI Config ### OpenAI Config
# Setting up OpenAI Client with API Key # Setting up OpenAI Client with API Key
api_key = os.getenv("OPENAI_API_KEY") api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI( client = OpenAI(
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
project="proj_NGz8Kux8CSka7DRJucAlDCz6", project="proj_NGz8Kux8CSka7DRJucAlDCz6",
api_key=api_key api_key=api_key
) )
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript # screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: # This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
class EventHandler(AssistantEventHandler): class EventHandler(AssistantEventHandler):
@override @override
def on_text_created(self, text) -> None: def on_text_created(self, text) -> None:
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
@override @override
def on_text_delta(self, delta, snapshot): def on_text_delta(self, delta, snapshot):
awaiter(output_stream.send_delta(delta.value)) awaiter(output_stream.send_delta(delta.value))
def on_tool_call_created(self, tool_call): def on_tool_call_created(self, tool_call):
raise Exception("Assistant shouldn't be calling tools.") raise Exception("Assistant shouldn't be calling tools.")
def create_and_stream(transcript): def create_and_stream(transcript):
with client.beta.threads.create_and_run_stream( with client.beta.threads.create_and_run_stream(
assistant_id=asst_screw_bardo_id, assistant_id=asst_screw_bardo_id,
thread={ thread={
"messages": [{"role": "user", "content": transcript}] "messages": [{"role": "user", "content": transcript}]
}, },
event_handler=EventHandler() event_handler=EventHandler()
) as stream: ) as stream:
stream.until_done() stream.until_done()
output_stream.done = True output_stream.done = True
def get_video_id(url): def get_video_id(url):
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
id = re.search(youtu_be, url) id = re.search(youtu_be, url)
if not id: if not id:
id = re.search(youtube_com, url) id = re.search(youtube_com, url)
if not id: if not id:
# Couldn't parse video ID from URL # Couldn't parse video ID from URL
return None return None
return id.group(1) return id.group(1)
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt # Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
def get_auto_transcript(video_id): def get_auto_transcript(video_id):
trans_api_errors = youtube_transcript_api._errors trans_api_errors = youtube_transcript_api._errors
try: try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
except trans_api_errors.TranscriptsDisabled as e: except trans_api_errors.TranscriptsDisabled as e:
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
return None return None
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
txt_transcript = formatter.format_transcript(transcript) txt_transcript = formatter.format_transcript(transcript)
return txt_transcript return txt_transcript
output_stream = StreamOutput() output_stream = StreamOutput()

13
docker-compose.yml Normal file
View File

@@ -0,0 +1,13 @@
version: '3.8'
services:
app:
build: .
container_name: screw-bardo-container
ports:
- "1986:1986"
env_file:
- .env
volumes:
- ./logs:/app/logs
restart: unless-stopped

0
log.md
View File

Binary file not shown.

3
start.sh Normal file
View File

@@ -0,0 +1,3 @@
#!/bin/bash
cd ./app
exec gunicorn -b 0.0.0.0:1986 --log-level debug app:app