Merge branch 'dev'
This commit is contained in:
12
.dockerignore
Normal file
12
.dockerignore
Normal file
@@ -0,0 +1,12 @@
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.env
|
||||
*venv/
|
||||
.env
|
||||
*.git
|
||||
.gitignore
|
||||
Dockerfile
|
||||
docker-compose.yml
|
||||
log.md
|
||||
31
Dockerfile
Normal file
31
Dockerfile
Normal file
@@ -0,0 +1,31 @@
|
||||
# Use an official Python runtime as a parent image
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set work directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application files
|
||||
COPY . /app
|
||||
|
||||
# Make start.sh executable
|
||||
RUN chmod +x start.sh
|
||||
|
||||
# Expose the port the app runs on
|
||||
EXPOSE 1986
|
||||
|
||||
# Specify the entrypoint script
|
||||
ENTRYPOINT ["./start.sh"]
|
||||
@@ -1,129 +1,129 @@
|
||||
# To parse video ids
|
||||
import re
|
||||
|
||||
# Youtube Transcript stuff import
|
||||
import youtube_transcript_api._errors
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api.formatters import TextFormatter
|
||||
|
||||
# OpenAI API stuff import
|
||||
from openai import AssistantEventHandler
|
||||
from openai import OpenAI
|
||||
|
||||
### For streaming
|
||||
from typing_extensions import override
|
||||
import asyncio
|
||||
awaiter = asyncio.run
|
||||
|
||||
# The StreamOutput class to handle streaming
|
||||
class StreamOutput:
|
||||
|
||||
def __init__(self):
|
||||
self.delta: str = ""
|
||||
self.response: str = ""
|
||||
self.done: bool = False
|
||||
self.buffer: list = []
|
||||
|
||||
def reset(self):
|
||||
self.delta = ""
|
||||
self.response = ""
|
||||
self.done = False
|
||||
self.buffer: list = []
|
||||
|
||||
async def send_delta(self, delta):
|
||||
self.delta = delta
|
||||
self.response += delta
|
||||
def get_index(list):
|
||||
if len(list) == 0:
|
||||
return 0
|
||||
else:
|
||||
return len(list)-1
|
||||
if self.buffer != []:
|
||||
try:
|
||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||
self.buffer.append(delta)
|
||||
except IndexError as index_error:
|
||||
log(f"\nCaught IndexError: {str(index_error)}")
|
||||
self.buffer.append(delta)
|
||||
else: self.buffer.append(delta)
|
||||
|
||||
# To get the env var
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# For logging
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
|
||||
def log(str):
|
||||
with open("log.md", "at") as file:
|
||||
file.write(str)
|
||||
|
||||
### OpenAI Config
|
||||
|
||||
# Setting up OpenAI Client with API Key
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI(
|
||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||
|
||||
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
||||
class EventHandler(AssistantEventHandler):
|
||||
@override
|
||||
def on_text_created(self, text) -> None:
|
||||
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
||||
|
||||
@override
|
||||
def on_text_delta(self, delta, snapshot):
|
||||
awaiter(output_stream.send_delta(delta.value))
|
||||
|
||||
def on_tool_call_created(self, tool_call):
|
||||
raise Exception("Assistant shouldn't be calling tools.")
|
||||
|
||||
def create_and_stream(transcript):
|
||||
with client.beta.threads.create_and_run_stream(
|
||||
assistant_id=asst_screw_bardo_id,
|
||||
thread={
|
||||
"messages": [{"role": "user", "content": transcript}]
|
||||
},
|
||||
event_handler=EventHandler()
|
||||
) as stream:
|
||||
stream.until_done()
|
||||
output_stream.done = True
|
||||
|
||||
def get_video_id(url):
|
||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||
|
||||
id = re.search(youtu_be, url)
|
||||
if not id:
|
||||
id = re.search(youtube_com, url)
|
||||
|
||||
if not id:
|
||||
# Couldn't parse video ID from URL
|
||||
return None
|
||||
|
||||
return id.group(1)
|
||||
|
||||
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
|
||||
def get_auto_transcript(video_id):
|
||||
trans_api_errors = youtube_transcript_api._errors
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||
except trans_api_errors.TranscriptsDisabled as e:
|
||||
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
||||
return None
|
||||
|
||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||
|
||||
txt_transcript = formatter.format_transcript(transcript)
|
||||
return txt_transcript
|
||||
|
||||
# To parse video ids
|
||||
import re
|
||||
|
||||
# Youtube Transcript stuff import
|
||||
import youtube_transcript_api._errors
|
||||
from youtube_transcript_api import YouTubeTranscriptApi
|
||||
from youtube_transcript_api.formatters import TextFormatter
|
||||
|
||||
# OpenAI API stuff import
|
||||
from openai import AssistantEventHandler
|
||||
from openai import OpenAI
|
||||
|
||||
### For streaming
|
||||
from typing_extensions import override
|
||||
import asyncio
|
||||
awaiter = asyncio.run
|
||||
|
||||
# The StreamOutput class to handle streaming
|
||||
class StreamOutput:
|
||||
|
||||
def __init__(self):
|
||||
self.delta: str = ""
|
||||
self.response: str = ""
|
||||
self.done: bool = False
|
||||
self.buffer: list = []
|
||||
|
||||
def reset(self):
|
||||
self.delta = ""
|
||||
self.response = ""
|
||||
self.done = False
|
||||
self.buffer: list = []
|
||||
|
||||
async def send_delta(self, delta):
|
||||
self.delta = delta
|
||||
self.response += delta
|
||||
def get_index(list):
|
||||
if len(list) == 0:
|
||||
return 0
|
||||
else:
|
||||
return len(list)-1
|
||||
if self.buffer != []:
|
||||
try:
|
||||
if self.delta != self.buffer[get_index(self.buffer)]:
|
||||
self.buffer.append(delta)
|
||||
except IndexError as index_error:
|
||||
log(f"\nCaught IndexError: {str(index_error)}")
|
||||
self.buffer.append(delta)
|
||||
else: self.buffer.append(delta)
|
||||
|
||||
# To get the env var
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
load_dotenv()
|
||||
|
||||
# For logging
|
||||
import pytz
|
||||
from datetime import datetime
|
||||
|
||||
def log(str):
|
||||
with open("log.md", "at") as file:
|
||||
file.write(str)
|
||||
|
||||
### OpenAI Config
|
||||
|
||||
# Setting up OpenAI Client with API Key
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
client = OpenAI(
|
||||
organization='org-7ANUFsqOVIXLLNju8Rvmxu3h',
|
||||
project="proj_NGz8Kux8CSka7DRJucAlDCz6",
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
# screw bardo assistant that is configured to make notes and 5Q&A based on any given YouTube Transcript
|
||||
asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
|
||||
|
||||
# This is copy and pasted straight up from the quickstart guide, just appending to an output buffer instead of directly printing:
|
||||
class EventHandler(AssistantEventHandler):
|
||||
@override
|
||||
def on_text_created(self, text) -> None:
|
||||
awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
|
||||
|
||||
@override
|
||||
def on_text_delta(self, delta, snapshot):
|
||||
awaiter(output_stream.send_delta(delta.value))
|
||||
|
||||
def on_tool_call_created(self, tool_call):
|
||||
raise Exception("Assistant shouldn't be calling tools.")
|
||||
|
||||
def create_and_stream(transcript):
|
||||
with client.beta.threads.create_and_run_stream(
|
||||
assistant_id=asst_screw_bardo_id,
|
||||
thread={
|
||||
"messages": [{"role": "user", "content": transcript}]
|
||||
},
|
||||
event_handler=EventHandler()
|
||||
) as stream:
|
||||
stream.until_done()
|
||||
output_stream.done = True
|
||||
|
||||
def get_video_id(url):
|
||||
youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
|
||||
youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
|
||||
|
||||
id = re.search(youtu_be, url)
|
||||
if not id:
|
||||
id = re.search(youtube_com, url)
|
||||
|
||||
if not id:
|
||||
# Couldn't parse video ID from URL
|
||||
return None
|
||||
|
||||
return id.group(1)
|
||||
|
||||
# Takes the transcript and formats it in basic text before writing it to auto-transcript.txt
|
||||
def get_auto_transcript(video_id):
|
||||
trans_api_errors = youtube_transcript_api._errors
|
||||
try:
|
||||
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
|
||||
except trans_api_errors.TranscriptsDisabled as e:
|
||||
log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
|
||||
return None
|
||||
|
||||
formatter = TextFormatter() # Ensure that you create an instance of TextFormatter
|
||||
|
||||
txt_transcript = formatter.format_transcript(transcript)
|
||||
return txt_transcript
|
||||
|
||||
output_stream = StreamOutput()
|
||||
13
docker-compose.yml
Normal file
13
docker-compose.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
app:
|
||||
build: .
|
||||
container_name: screw-bardo-container
|
||||
ports:
|
||||
- "1986:1986"
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./logs:/app/logs
|
||||
restart: unless-stopped
|
||||
BIN
requirements.txt
BIN
requirements.txt
Binary file not shown.
Reference in New Issue
Block a user