Somewhat messy, but ready for production.

2024-10-15 20:45:01 -04:00
parent 05b7c247a7
commit d66fe2155c
5 changed files with 238 additions and 37 deletions
--- a/app.py
+++ b/app.py
@@ -1,10 +1,8 @@
 from flask import Flask, render_template, Response, request
-from main import get_auto_transcript, get_video_id, create_and_stream, output_stream, fake_stream, awaiter
+from main import get_auto_transcript, get_video_id, create_and_stream, log, output_stream, awaiter 
 from asyncio import sleep
 from datetime import datetime
-import threading
+import threading, pytz
 import pytz
 import time
@@ -16,44 +14,44 @@ def home():
@app.route('/process_url', methods=['POST'])
 def process_url():
-    # Opens a file to log the video id and the assistants respone to see if I can further improve instructions:
+    global thread
-    #log = open("log.txt", "at", 1)
+    log(f"\n\n\n## New Entry at {datetime.now(pytz.timezone('America/New_York')).strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    url = request.form['url']
-    if url == "test":
+    log(f"URL: {url}\n")
        global thread
        thread = threading.Thread(name="test_thread", target=fake_stream)
        return Response("teehee", status=200)
    # Extract the video ID from the URL
    video_id = get_video_id(url)  # Modify this function to accept the URL
    if not video_id:
        log(f"Could not parse video id from URL: {url}")
        return "Couldn't parse video ID from URL. (Are you sure you entered a valid YouTube.com or YouTu.be URL?)"
    log(f"Video ID: {video_id}\n\n")
    # Get the transcript for that video ID
    transcript = get_auto_transcript(video_id)
    if (not transcript):
        log("## Error: could not retrieve transcript, Assistant won't be called.")
        return "Successfully parsed video ID from URL, however the ID was either invalid, the transcript was disabled by the video owner, or some other error was raised because of YouTube."
    thread = threading.Thread(name="create_stream", target=create_and_stream, args=(transcript,)) # The comma here is very intentional, it's so that it iterates it as a tuple rather than iterateing the string.
-    
+    log("Stream preperation complete, sending reply...\n\n")
-    return Response("Processing started. Check /stream_output for updates.", status=200)  # Add more detailed output if needed
+    return Response("Processing started. Check /stream_output for updates.", content_type='text/plain', status=200)  # Add more detailed output if needed
@app.route('/stream_output')
 def stream_output():
    def yoink():
-        print("Starting stream thread.")
+        log("<details>\n<summary>Starting stream thread...</summary>\n\n")
        thread.start()
        # Start streaming output from output_stream
-        print("Starting to stream output...")
+        log("Starting to stream output.")
        most_recent = ""
        while not output_stream.done:
            if output_stream.buffer != []:
                delta = output_stream.buffer.pop(0)
                yield bytes(delta, encoding="utf-8")
            else:
                awaiter(sleep(0.05))
        log(f"\nStream successfully completely.\n\n</details>\n\n---\n\n### Completed Assistant Response:\n{output_stream.response}\n\n---\n\n")
        output_stream.reset()
        thread.join()
        log("\n### Task completed sucessfully without errors!")
        return 
    return Response(yoink(), content_type='text/plain', status=200)
--- a/log.md
+++ b/log.md
@@ -0,0 +1,195 @@
 ## New Entry at 2024-10-15 20:39:11
 URL: https://www.youtube.com/watch?v=dQw4w9WgXcQ
 Video ID: dQw4w9WgXcQ
 Stream preperation complete, sending reply...
 <details>
 <summary>Starting stream thread...</summary>
 Starting to stream output.
 Stream successfully completely.
 </details>
 ---
 ### Completed Assistant Response:
 Response Recieved:
 Screw-Bardo:
 It appears that the transcript provided contains lyrics from a popular song rather than a social studies lecture. Given this, I will proceed with the task of taking notes and generating questions based on what is present in the text, even though it does not relate to social studies.
 **Notes:**
 - Love
  - Involves rules and commitments
  - Emphasizes feelings and understanding
 - Relationship
  - Characters have known each other for a long time
  - Heartache without expression 
  - Mutual understanding of feelings
 - Commitment
  - Pledges never to give up, let down, or desert
  - Promises made to avoid causing pain or sadness
 **Questions:**
 Q: What is the central theme of the lyrics?
 A: The central theme is love and commitment.
 Q: What has been shared between the characters?
 A: They have known each other for a long time.
 Q: What emotions are expressed in the relationship?
 A: There is heartache and mutual understanding.
 Q: What promises are made in the lyrics?
 A: Promises are made to never give up, let down, or desert the other.
 Q: How do the characters feel about their current situation?
 A: They feel a need to express unspoken feelings.
 ---
 ### Task completed sucessfully without errors!
 ## New Entry at 2024-10-15 20:39:30
 URL: https://www.youtube.com/watch?v=ivBcfQDtMhQ
 Video ID: ivBcfQDtMhQ
 Stream preperation complete, sending reply...
 <details>
 <summary>Starting stream thread...</summary>
 Starting to stream output.
 Stream successfully completely.
 </details>
 ---
 ### Completed Assistant Response:
 Response Recieved:
 Screw-Bardo:
 ### Notes
 - **U.S. Response to World War II**
  - Initially isolated and neutral
    - Disillusionment from World War I
      - 260,000 American deaths
      - No territorial or colonial gain
    - Lack of belief in the League of Nations
      - Did not join or sign Treaty of Versailles
  - War profiteering concerns
    - General Gerald Nye
      - Formed the Nye Committee to investigate war profiteering
  - International debts not paid
    - Countries not repaying war debts to the U.S.
  - Geographical isolation
    - Distance from Europe (3,000 miles away)
  - Domestic issues
    - Focus on Great Depression recovery
    - Neutrality Acts (1935, 1936, 1937)
      - Made it illegal to choose sides in foreign conflicts
      - Included arms embargo against warring nations
  - **FDR's Role**
    - Circumvented Neutrality Acts with Cash and Carry
    - Saw war as an opportunity to lift the U.S. out of the Great Depression
    - Restricted foreign travel for Americans
      - Response to sinking of the Lusitania
 - **America First Committee**
  - Promoted by Charles Lindbergh
  - Advocated for prioritizing American interests over European affairs
 - **Neutrality Renouncement (1937)**
  - FDR began military buildup
  - First peacetime draft (Selective Service Act of 1940)
    - Ages 21 to 35
    - Aimed to create an army of several million
 - **Aid to Allies**
  - Lend-Lease Act (1941)
    - U.S. supported Allies by supplying war materials
  - Atlantic Charter (1941)
    - Agreement between Roosevelt and Churchill outlining war aims
 - **Industrial Mobilization**
  - Massive increase in production (e.g., arms and materials)
    - 1940: 6,000 planes
    - 1945: 60,000 planes
    - Weapons production increased significantly
  - Women's contribution to the workforce
    - Example: Rosie the Riveter
 - **Government Agencies**
  - War Production Board
    - Restricted civilian goods production
  - Office of Price Administration (OPA)
    - Set prices and rents
  - Office of War Information
    - Censored information to control public messaging
  - War financing through bonds and taxes
    - 49 billion dollars in bonds by 1945
 - **Labor Relations**
  - AFL and CIO agreements to avoid strikes
  - Wage increases followed by freezes
  - Managing strikes, like coal strike in 1943
 ### Questions and Answers
 Q: Why did the U.S. initially maintain an isolationist stance during World War II?  
 A: The U.S. was disillusioned from World War I and focused on domestic economic recovery.
 Q: What were the Neutrality Acts?  
 A: Laws that made it illegal for the U.S. to choose sides in foreign conflicts or export arms to warring nations.
 Q: Who was Charles Lindbergh, and what did he advocate for?  
 A: A prominent leader of the America First Committee who promoted prioritizing American interests over involvement in Europe.
 Q: What was the Lend-Lease Act?  
 A: A U.S. program that supplied Allied nations with war materials during World War II.
 Q: How did the U.S. government finance the war effort?  
 A: Through the sale of war bonds and increased taxes.
 ---
 ### Task completed sucessfully without errors!
 ## New Entry at 2024-10-15 20:40:02
 URL: https://www.youtube.com/watch?v=ivBc5QDtMhQ
 Video ID: ivBc5QDtMhQ
 # Exception while fetching transcript:
 Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ivBc5QDtMhQ! This is most likely caused by:
 Subtitles are disabled for this video
 If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
 ## Error: could not retrieve transcript, Assistant won't be called.
 ## New Entry at 2024-10-15 20:43:26
 URL: https://www.youtube.com/watch?v=ivBfQDtMhQ
 Could not parse video id from URL: https://www.youtube.com/watch?v=ivBfQDtMhQ
--- a/main.py
+++ b/main.py
@@ -29,7 +29,6 @@ class StreamOutput:
    self.response = ""
    self.done = False
    self.buffer: list = []
    print("Reset stream output obj")
  async def send_delta(self, delta):
      self.delta = delta
@@ -44,9 +43,8 @@ class StreamOutput:
          if self.delta != self.buffer[get_index(self.buffer)]:
            self.buffer.append(delta)
        except IndexError as index_error:
-          print(index_error)
+          log(f"\nCaught IndexError: {str(index_error)}")
          self.buffer.append(delta)
      else: self.buffer.append(delta)
 # To get the env var
@@ -55,6 +53,13 @@ import os
 load_dotenv()
 # For logging
 import pytz
 from datetime import datetime
 def log(str):
  with open("log.md", "at") as file:
    file.write(str)
 ### OpenAI Config 
@@ -73,7 +78,7 @@ asst_screw_bardo_id = "asst_JGFaX6uOIotqy5mIJnu3Yyp7"
 class EventHandler(AssistantEventHandler):    
  @override
  def on_text_created(self, text) -> None:
-    awaiter(output_stream.send_delta("Response Recieved:\nScrew-Bardo: "))
+    awaiter(output_stream.send_delta("Response Recieved:\n\nScrew-Bardo:\n\n"))
  @override
  def on_text_delta(self, delta, snapshot):
@@ -93,17 +98,6 @@ def create_and_stream(transcript):
      stream.until_done()
      output_stream.done = True
 def fake_stream():
  i = 0
  STREAM = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18]
  print("Starting fake stream.")
  while i <= len(STREAM)-1:
    awaiter(asyncio.sleep(0.05))
    awaiter(output_stream.send_delta(str(STREAM[i])))
    i += 1
  output_stream.done = True
  return
 def get_video_id(url):
  youtu_be = r'(?<=youtu.be/)([A-Za-z0-9_-]{11})'
  youtube_com = r'(?<=youtube\.com\/watch\?v=)([A-Za-z0-9_-]{11})'
@@ -113,7 +107,7 @@ def get_video_id(url):
      id = re.search(youtube_com, url)
  if not id:
-      print("Couldn't parse video ID from URL")
+      # Couldn't parse video ID from URL
      return None
  return id.group(1)
@@ -123,7 +117,8 @@ def get_auto_transcript(video_id):
  trans_api_errors = youtube_transcript_api._errors
  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'], proxies=None, cookies=None, preserve_formatting=False)
-  except trans_api_errors.TranscriptsDisabled:
+  except trans_api_errors.TranscriptsDisabled as e:
    log(f'\n\n# Exception while fetching transcript:\n \n{e}\n')
    return None
  formatter = TextFormatter()  # Ensure that you create an instance of TextFormatter
--- a/website/static/script.js
+++ b/website/static/script.js
@@ -8,6 +8,9 @@ document.addEventListener("DOMContentLoaded", (event) => {
            response_area.innerText = 'Please enter a URL.';
            return;
        }
        else {
            document.getElementById('url_box').value = "";
        }
        // First, process the URL
        fetch('/process_url', {
@@ -21,9 +24,17 @@ document.addEventListener("DOMContentLoaded", (event) => {
            if (!response.ok) {
                throw new Error('Network response was not ok');
            }
-            // Start streaming once processing is started
+            // Extract the text from the response body
-            submit_button.style.display = "none";
+            return response.text(); // Use .json() if the response is JSON
-            streamOutput(response_area);
+        })
        .then(text => {
            submit_button.style.display = "none";    
            if (text === "Processing started. Check /stream_output for updates.") {
                streamOutput(response_area);
            } else {
                response_area.innerText = text; // Show any other response message
                submit_button.style.display = "flex";
            }
        })
        .catch(error => {
            console.error('Error processing URL:', error);
@@ -52,6 +63,7 @@ function streamOutput(response_area) {
                    // Decode and process the chunk
                    const chunk = decoder.decode(value, { stream: true });
                    response_area.innerHTML += chunk;
                    response_area.scrollTop = response_area.scrollHeight
                    // Continue reading
                    readStream();
--- a/website/static/style.css
+++ b/website/static/style.css
@@ -37,12 +37,13 @@ body .content {
 }
 #response-area {
-    display: flex;
+    display: block;
    height: 90%;
    min-height: 90vh;
    text-wrap: wrap;
    flex-wrap: wrap;
    align-content: flex-end;
    overflow-y: auto;
 }
 .form_box {