From cb4a781d3ff5367195968f3a82f3484bbf2e17a0 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Wed, 20 Nov 2024 12:02:27 -0500 Subject: [PATCH] Dockerized --- .dockerignore | 12 + Dockerfile | 31 +++ app.py => app/app.py | 0 main.py => app/main.py | 256 +++++++++--------- {website => app/website}/index.html | 0 .../font-files/nimbus-sans-d-ot-light.woff | Bin .../font-files/nimbus-sans-d-ot-light.woff2 | Bin {website => app/website}/static/script.js | 0 {website => app/website}/static/style.css | 0 docker-compose.yml | 13 + log.md | 0 requirements.txt | Bin 1108 -> 1158 bytes start.sh | 3 + 13 files changed, 187 insertions(+), 128 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile rename app.py => app/app.py (100%) rename main.py => app/main.py (96%) rename {website => app/website}/index.html (100%) rename {website => app/website}/static/font-files/nimbus-sans-d-ot-light.woff (100%) rename {website => app/website}/static/font-files/nimbus-sans-d-ot-light.woff2 (100%) rename {website => app/website}/static/script.js (100%) rename {website => app/website}/static/style.css (100%) create mode 100644 docker-compose.yml delete mode 100644 log.md create mode 100644 start.sh diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..fb3a569 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,12 @@ +__pycache__ +*.pyc +*.pyo +*.pyd +*.env +*venv/ +.env +*.git +.gitignore +Dockerfile +docker-compose.yml +log.md \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0e21369 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +# Use an official Python runtime as a parent image +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Set work directory +WORKDIR /app + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application files +COPY . /app + +# Make start.sh executable +RUN chmod +x start.sh + +# Expose the port the app runs on +EXPOSE 1986 + +# Specify the entrypoint script +ENTRYPOINT ["./start.sh"] diff --git a/app.py b/app/app.py similarity index 100% rename from app.py rename to app/app.py diff --git a/main.py b/app/main.py similarity index 96% rename from main.py rename to app/main.py index c4c8ead..a31470c 100644 --- a/main.py +++ b/app/main.py @@ -1,129 +1,129 @@ -# To parse video ids -import re - -# Youtube Transcript stuff import -import youtube_transcript_api._errors -from youtube_transcript_api import YouTubeTranscriptApi -from youtube_transcript_api.formatters import TextFormatter - -# OpenAI API stuff import -from openai import AssistantEventHandler -from openai import OpenAI - -### For streaming -from typing_extensions import override -import asyncio -awaiter = asyncio.run - -# The StreamOutput class to handle streaming -class StreamOutput: - - def __init__(self): - self.delta: str = "" - self.response: str = "" - self.done: bool = False - self.buffer: list = [] - - def reset(self): - self.delta = "" - self.response = "" - self.done = False - self.buffer: list = [] - - async def send_delta(self, delta): - self.delta = delta - self.response += delta - def get_index(list): - if len(list) == 0: - return 0 - else: - return len(list)-1 - if self.buffer != []: - try: - if self.delta != self.buffer[get_index(self.buffer)]: - self.buffer.append(delta) - except IndexError as index_error: - log(f"\nCaught IndexError: {str(index_error)}") - self.buffer.append(delta) - else: self.buffer.append(delta) - -# To get the env var -from dotenv import load_dotenv -import os - -load_dotenv() - -# For logging -import pytz -from datetime import datetime - -def log(str): - with open("log.md", "at") as file: - file.write(str) - -### OpenAI Config - -# Setting up OpenAI Client with API Key -api_key = os.getenv("OPENAI_API_KEY") -client = OpenAI( - organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', - project="proj_NGz8Kux8CSka7DRJucAlDCz6", - api_key=api_key -) - -# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript -asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" - -# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: -class EventHandler(AssistantEventHandler): - @override - def on_text_created(self, text) -> None: - awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) - - @override - def on_text_delta(self, delta, snapshot): - awaiter(output_stream.send_delta(delta.value)) - - def on_tool_call_created(self, tool_call): - raise Exception("Assistant shouldn't be calling tools.") - -def create_and_stream(transcript): - with client.beta.threads.create_and_run_stream( - assistant_id=asst_screw_bardo_id, - thread={ - "messages": [{"role": "user", "content": transcript}] - }, - event_handler=EventHandler() - ) as stream: - stream.until_done() - output_stream.done = True - -def get_video_id(url): - youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' - youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' - - id = re.search(youtu_be, url) - if not id: - id = re.search(youtube_com, url) - - if not id: - # Couldn't parse video ID from URL - return None - - return id.group(1) - -# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt -def get_auto_transcript(video_id): - trans_api_errors = youtube_transcript_api._errors - try: - transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) - except trans_api_errors.TranscriptsDisabled as e: - log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') - return None - - formatter = TextFormatter() # Ensure that you create an instance of TextFormatter - - txt_transcript = formatter.format_transcript(transcript) - return txt_transcript - +# To parse video ids +import re + +# Youtube Transcript stuff import +import youtube_transcript_api._errors +from youtube_transcript_api import YouTubeTranscriptApi +from youtube_transcript_api.formatters import TextFormatter + +# OpenAI API stuff import +from openai import AssistantEventHandler +from openai import OpenAI + +### For streaming +from typing_extensions import override +import asyncio +awaiter = asyncio.run + +# The StreamOutput class to handle streaming +class StreamOutput: + + def __init__(self): + self.delta: str = "" + self.response: str = "" + self.done: bool = False + self.buffer: list = [] + + def reset(self): + self.delta = "" + self.response = "" + self.done = False + self.buffer: list = [] + + async def send_delta(self, delta): + self.delta = delta + self.response += delta + def get_index(list): + if len(list) == 0: + return 0 + else: + return len(list)-1 + if self.buffer != []: + try: + if self.delta != self.buffer[get_index(self.buffer)]: + self.buffer.append(delta) + except IndexError as index_error: + log(f"\nCaught IndexError: {str(index_error)}") + self.buffer.append(delta) + else: self.buffer.append(delta) + +# To get the env var +from dotenv import load_dotenv +import os + +load_dotenv() + +# For logging +import pytz +from datetime import datetime + +def log(str): + with open("log.md", "at") as file: + file.write(str) + +### OpenAI Config + +# Setting up OpenAI Client with API Key +api_key = os.getenv("OPENAI_API_KEY") +client = OpenAI( + organization='org-7ANUFsqOVIXLLNju8Rvmxu3h', + project="proj_NGz8Kux8CSka7DRJucAlDCz6", + api_key=api_key +) + +# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript +asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7" + +# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing: +class EventHandler(AssistantEventHandler): + @override + def on_text_created(self, text) -> None: + awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n")) + + @override + def on_text_delta(self, delta, snapshot): + awaiter(output_stream.send_delta(delta.value)) + + def on_tool_call_created(self, tool_call): + raise Exception("Assistant shouldn't be calling tools.") + +def create_and_stream(transcript): + with client.beta.threads.create_and_run_stream( + assistant_id=asst_screw_bardo_id, + thread={ + "messages": [{"role": "user", "content": transcript}] + }, + event_handler=EventHandler() + ) as stream: + stream.until_done() + output_stream.done = True + +def get_video_id(url): + youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})' + youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})' + + id = re.search(youtu_be, url) + if not id: + id = re.search(youtube_com, url) + + if not id: + # Couldn't parse video ID from URL + return None + + return id.group(1) + +# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt +def get_auto_transcript(video_id): + trans_api_errors = youtube_transcript_api._errors + try: + transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False) + except trans_api_errors.TranscriptsDisabled as e: + log(f'\n\n# Exception while fetching transcript:\n \n{e}\n') + return None + + formatter = TextFormatter() # Ensure that you create an instance of TextFormatter + + txt_transcript = formatter.format_transcript(transcript) + return txt_transcript + output_stream = StreamOutput() \ No newline at end of file diff --git a/website/index.html b/app/website/index.html similarity index 100% rename from website/index.html rename to app/website/index.html diff --git a/website/static/font-files/nimbus-sans-d-ot-light.woff b/app/website/static/font-files/nimbus-sans-d-ot-light.woff similarity index 100% rename from website/static/font-files/nimbus-sans-d-ot-light.woff rename to app/website/static/font-files/nimbus-sans-d-ot-light.woff diff --git a/website/static/font-files/nimbus-sans-d-ot-light.woff2 b/app/website/static/font-files/nimbus-sans-d-ot-light.woff2 similarity index 100% rename from website/static/font-files/nimbus-sans-d-ot-light.woff2 rename to app/website/static/font-files/nimbus-sans-d-ot-light.woff2 diff --git a/website/static/script.js b/app/website/static/script.js similarity index 100% rename from website/static/script.js rename to app/website/static/script.js diff --git a/website/static/style.css b/app/website/static/style.css similarity index 100% rename from website/static/style.css rename to app/website/static/style.css diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..403d03b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: '3.8' + +services: + app: + build: . + container_name: screw-bardo-container + ports: + - "1986:1986" + env_file: + - .env + volumes: + - ./logs:/app/logs + restart: unless-stopped \ No newline at end of file diff --git a/log.md b/log.md deleted file mode 100644 index e69de29..0000000 diff --git a/requirements.txt b/requirements.txt index 6fe18c5fa70cf40dccf6a24f35a5a2b02ae40b62..15691711094f3de024d83b7d4c9537336e4a00b5 100644 GIT binary patch delta 162 zcmcb@(Z)H!gV%^bkD-7epP`tcgu!s4r!=c2P{3efW+bZ#m~q@yK9wPjp%kbfl_7

?-Kx{afmC=;d5TtmrJ!3qh`Q-VGQ&}N0mQ1po#vmCWHkusIBn>vy nbaFkD;^ae2Vw1Nq-2}Lb+R3! z9;*S6V?4Q#F`m(M@