From 4e3075baad90951ed7745b6afcb6267158441d95 Mon Sep 17 00:00:00 2001 From: foreverpyrite Date: Sat, 6 Sep 2025 18:14:28 -0400 Subject: [PATCH 1/4] Pre-testing phase --- .gitignore | 3 + Cargo.toml | 32 ++++ src/_async.rs | 4 + src/_async/r2bucket.rs | 84 +++++++++ src/_async/r2client.rs | 289 ++++++++++++++++++++++++++++++ src/aws_signing.rs | 197 +++++++++++++++++++++ src/error.rs | 15 ++ src/lib.rs | 19 ++ src/mimetypes.rs | 103 +++++++++++ src/sync.rs | 4 + src/sync/r2bucket.rs | 67 +++++++ src/sync/r2client.rs | 387 +++++++++++++++++++++++++++++++++++++++++ todo.md | 6 + 13 files changed, 1210 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/_async.rs create mode 100644 src/_async/r2bucket.rs create mode 100644 src/_async/r2client.rs create mode 100644 src/aws_signing.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/mimetypes.rs create mode 100644 src/sync.rs create mode 100644 src/sync/r2bucket.rs create mode 100644 src/sync/r2client.rs create mode 100644 todo.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b97b0a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +.aider* +.env diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..091590b --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "r2client" +version = "0.1.0" +edition = "2024" + +[lib] + +[dependencies] +sha2 = "0.10.9" +bytes = "1.10.1" +reqwest = { version = "0.12.19", features = ["blocking"] } +chrono = "0.4.41" +hex = "0.4.3" +hmac = "0.12.1" +xmltree = "0.11.0" +thiserror = "2" +async-trait = "0.1.89" +async-std = { version = "1.0", optional = true } +tokio = { version = "1.0", features = ["rt-multi-thread"], optional = true } +futures-executor = { version = "0.3", optional = true } +urlencoding = "2.1.3" +http = "1.3.1" + +[dev-dependencies] +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +dotenv = "0.15" + +[features] +async = ["tokio"] +default = ["async"] +sync = ["tokio/rt-multi-thread", "futures-executor"] +async-std-runtime = ["async-std"] diff --git a/src/_async.rs b/src/_async.rs new file mode 100644 index 0000000..57daa66 --- /dev/null +++ b/src/_async.rs @@ -0,0 +1,4 @@ +mod r2bucket; +mod r2client; +pub use r2bucket::R2Bucket; +pub use r2client::R2Client; diff --git a/src/_async/r2bucket.rs b/src/_async/r2bucket.rs new file mode 100644 index 0000000..fcd3cae --- /dev/null +++ b/src/_async/r2bucket.rs @@ -0,0 +1,84 @@ +use crate::R2Client; +use crate::R2Error; + +#[derive(Clone, Debug)] +pub struct R2Bucket { + bucket: String, + pub client: R2Client, +} + +impl R2Bucket { + pub fn new(bucket: String, client: R2Client) -> Self { + Self { bucket, client } + } + + pub fn from_credentials( + bucket: String, + access_key: String, + secret_key: String, + endpoint: String, + ) -> Self { + let client = R2Client::from_credentials(access_key, secret_key, endpoint); + Self { bucket, client } + } + + pub async fn upload_file( + &self, + local_file_path: &str, + r2_file_key: &str, + ) -> Result<(), R2Error> { + self.client + // I'm pasing None to let the R2Client derive the content type from the local_file_path + .upload_file(&self.bucket, local_file_path, r2_file_key, None) + .await + } + + pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + self.client + .download_file(&self.bucket, r2_file_key, local_path) + .await + } + + pub async fn list_files( + &self, + ) -> Result>, R2Error> { + self.client.list_files(&self.bucket).await + } + + pub async fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket).await + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::R2Client; + use std::env; + + fn get_test_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let access_key = + env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); + let secret_key = + env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); + let endpoint = env::var("R2_ENDPOINT") + .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); + let client = R2Client::from_credentials(access_key, secret_key, endpoint); + R2Bucket::new("test-bucket".to_string(), client) + } + + #[test] + fn test_bucket_construction() { + let bucket = get_test_bucket(); + assert_eq!(bucket.bucket, "test-bucket"); + } + + // Example async test (requires a runtime, so ignored by default) + // #[tokio::test] + // async fn test_upload_file() { + // let bucket = get_test_bucket(); + // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; + // assert!(result.is_ok()); + // } +} diff --git a/src/_async/r2client.rs b/src/_async/r2client.rs new file mode 100644 index 0000000..0b5c722 --- /dev/null +++ b/src/_async/r2client.rs @@ -0,0 +1,289 @@ +use crate::mimetypes::Mime; +use crate::{R2Error, aws_signing}; +use http::Method; +use reqwest::header::{self, HeaderName, HeaderValue}; +use sha2::{Digest, Sha256}; +use std::collections::HashMap; +use std::str::FromStr; + +#[derive(Clone, Debug)] +pub struct R2Client { + access_key: String, + secret_key: String, + endpoint: String, +} + +impl R2Client { + fn get_env() -> Result<(String, String, String), R2Error> { + let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; + let values = keys + .map(|key| { std::env::var(key).map_err(|_| R2Error::Env(key.to_owned())) }.unwrap()); + Ok(values.into()) + } + + pub fn new() -> Self { + let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); + + Self { + access_key, + secret_key, + endpoint, + } + } + + pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { + Self { + access_key, + secret_key, + endpoint, + } + } + + fn create_headers( + &self, + method: http::Method, + bucket: &str, + key: Option<&str>, + payload_hash: &str, + content_type: Option<&str>, + ) -> Result { + let uri = http::Uri::from_str(&self.build_url(bucket, key)) + .expect("invalid uri rip (make sure the build_url function works as intended)"); + let mut headers = Vec::new(); + if method == Method::GET { + headers.push(( + "x-amz-content-sha256".to_string(), + "UNSIGNED-PAYLOAD".to_string(), + )) + } + if let Some(content_type) = content_type { + headers.push(("content-type".to_string(), content_type.to_owned())) + } + + let (_, headers) = aws_signing::signature( + method, + uri, + headers, + payload_hash, + "s3", + "us-east-1", + &self.secret_key, + &self.access_key, + ); + let mut header_map = header::HeaderMap::new(); + for header in headers { + header_map.insert( + HeaderName::from_lowercase(&header.0.to_lowercase().as_bytes()) + .expect("shit tragic"), + HeaderValue::from_str(&header.1).expect("shit more tragic"), + ); + } + Ok(header_map) + } + + pub async fn upload_file( + &self, + bucket: &str, + local_file_path: &str, + r2_file_key: &str, + content_type: Option<&str>, + ) -> Result<(), R2Error> { + // --- Hash Payload -- + let file_data = std::fs::read(local_file_path)?; + let payload_hash = hex::encode(Sha256::digest(&file_data)); + + // Set HTTP Headers + let content_type = if let Some(content_type) = content_type { + Some(content_type) + } else { + Some(Mime::get_mimetype_from_fp(local_file_path)) + }; + let headers = self.create_headers( + Method::PUT, + bucket, + Some(r2_file_key), + &payload_hash, + content_type, + )?; + let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); + let client = reqwest::Client::new(); + let resp = client + .put(&file_url) + .headers(headers) + .body(file_data) + .send() + .await?; + let status = resp.status(); + let text = resp.text().await?; + if status.is_success() { + Ok(()) + } else { + Err(R2Error::Other(format!( + "Upload failed with status {}: {}", + status, text + ))) + } + } + pub async fn download_file( + &self, + bucket: &str, + key: &str, + local_path: &str, + ) -> Result<(), R2Error> { + let payload_hash = hex::encode(Sha256::digest("")); + let content_type = Mime::get_mimetype_from_fp(local_path); + let headers = self.create_headers( + Method::GET, + bucket, + Some(key), + &payload_hash, + Some(content_type), + )?; + let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); + let client = reqwest::Client::new(); + let resp = client.get(&file_url).headers(headers).send().await?; + let status = resp.status(); + let content = resp.bytes().await?; + if status.is_success() { + std::fs::write(local_path, &content)?; + Ok(()) + } else { + Err(R2Error::Other(format!( + "Download failed with status {}", + status + ))) + } + } + async fn get_bucket_listing(&self, bucket: &str) -> Result { + let payload_hash = "UNSIGNED-PAYLOAD"; + let content_type = "application/xml"; + let headers = + self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; + let url = self.build_url(bucket, None); + let client = reqwest::Client::new(); + let resp = client + .get(&url) + .headers(headers) + .send() + .await + .map_err(R2Error::from)?; + let status = resp.status(); + if status.is_success() { + resp.text().await.map_err(R2Error::from) + } else { + Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + } + } + + pub async fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; + let mut files_dict: HashMap> = HashMap::new(); + let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; + for content in root + .children + .iter() + .filter_map(|c| c.as_element()) + .filter(|e| e.name == "Contents") + { + let key_elem = content.get_child("Key").and_then(|k| k.get_text()); + if let Some(file_key) = key_elem { + let (folder, file_name): (String, String) = if let Some(idx) = file_key.rfind('/') { + (file_key[..idx].to_string(), file_key[idx + 1..].to_string()) + } else { + ("".to_string(), file_key.to_string()) + }; + files_dict.entry(folder).or_default().push(file_name); + } + } + Ok(files_dict) + } + + pub async fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; + let mut folders = std::collections::HashSet::new(); + let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; + for content in root + .children + .iter() + .filter_map(|c| c.as_element()) + .filter(|e| e.name == "Contents") + { + let key_elem = content.get_child("Key").and_then(|k| k.get_text()); + if let Some(file_key) = key_elem { + if let Some(idx) = file_key.find('/') { + folders.insert(file_key[..idx].to_string()); + } + } + } + Ok(folders.into_iter().collect()) + } + + fn build_url(&self, bucket: &str, key: Option<&str>) -> String { + match key { + Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + None => format!("{}/{}/", self.endpoint, bucket), + } + } +} +impl Default for R2Client { + fn default() -> Self { + let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); + + Self { + access_key, + secret_key, + endpoint, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn r2client_from_env() -> R2Client { + unsafe { + std::env::set_var("R2_ACCESS_KEY", "AKIAEXAMPLE"); + std::env::set_var("R2_SECRET_KEY", "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"); + std::env::set_var("R2_ENDPOINT", "https://example.r2.cloudflarestorage.com"); + } + R2Client::new() + } + + #[test] + fn r2client_env() { + let r2client = r2client_from_env(); + + assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + assert_eq!( + r2client.secret_key, + "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + ); + assert_eq!( + r2client.endpoint, + "https://example.r2.cloudflarestorage.com" + ); + } + + #[test] + fn test_create_headers() { + let client = R2Client::from_credentials( + "AKIAEXAMPLE".to_string(), + "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), + "https://example.r2.cloudflarestorage.com".to_string(), + ); + let headers = client + .create_headers( + Method::PUT, + "bucket", + Some("key"), + "deadbeef", + Some("application/octet-stream"), + ) + .unwrap(); + assert!(headers.contains_key("x-amz-date")); + assert!(headers.contains_key("authorization")); + assert!(headers.contains_key("content-type")); + assert!(headers.contains_key("host")); + } +} diff --git a/src/aws_signing.rs b/src/aws_signing.rs new file mode 100644 index 0000000..b3b5333 --- /dev/null +++ b/src/aws_signing.rs @@ -0,0 +1,197 @@ +use chrono::Utc; +use hmac::{Hmac, Mac}; +use sha2::{Digest, Sha256}; + +type Hmac256 = Hmac; + +// --- Utility functions --- +fn lowercase(string: &str) -> String { + string.to_lowercase() +} + +fn hex>(data: T) -> String { + hex::encode(data) +} + +fn sha256hash>(data: T) -> [u8; 32] { + Sha256::digest(data).into() +} + +fn hmac_sha256(signing_key: &[u8], message: &str) -> Vec { + let mut mac = Hmac256::new_from_slice(signing_key).expect("bad key :pensive:"); + mac.update(message.as_bytes()); + mac.finalize().into_bytes().to_vec() +} + +fn trim(string: &str) -> String { + string.trim().to_string() +} + +fn hash>(payload: T) -> String { + hex(sha256hash(payload)) +} + +fn url_encode(url: &str) -> String { + let mut url = urlencoding::encode(url).into_owned(); + let encoded_to_replacement: [(&str, &str); 4] = + [("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")]; + for (encoded_chars_pattern, replacement) in encoded_to_replacement { + url = url.replace(encoded_chars_pattern, replacement) + } + url +} + +// --- Canonical request --- +fn create_canonical_request( + method: http::Method, + uri: http::Uri, + mut headers: Vec<(String, String)>, + hashed_payload: &str, +) -> (String, String, String) { + // HTTPMethod + let http_method = method.to_string(); + + // CanonicalURI = *path only* (spec forbids scheme+host here) + let canonical_uri = if uri.path().is_empty() { + "/".to_string() + } else { + uri.path().to_string() + }; + + // CanonicalQueryString (URL-encoded, sorted by key) + let canonical_query_string = if let Some(query_string) = uri.query() { + let mut pairs = query_string + .split('&') + .map(|query| { + let (k, v) = query.split_once('=').unwrap_or((query, "")); + (url_encode(k), url_encode(v)) + }) + .collect::>(); + pairs.sort_by(|a, b| a.0.cmp(&b.0)); + pairs + .into_iter() + .map(|(k, v)| format!("{}={}", k, v)) + .collect::>() + .join("&") + } else { + String::new() + }; + + // Ensure required headers (host and x-amz-date) are present + let host = uri + .host() + .expect("uri passed without a proper host") + .to_string(); + if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("host")) { + headers.push(("host".to_string(), host)); + } + + // CanonicalHeaders + SignedHeaders + let mut http_headers = headers + .iter() + .map(|(name, value)| (lowercase(name), trim(value))) + .collect::>(); + http_headers.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); + + let canonical_headers: String = http_headers + .iter() + .map(|(k, v)| format!("{}:{}\n", k, v)) + .collect(); + + let signed_headers: String = http_headers + .iter() + .map(|(k, _)| k.clone()) + .collect::>() + .join(";"); + + // Final canonical request + let canonical_request = format!( + "{}\n{}\n{}\n{}\n{}\n{}", + http_method, + canonical_uri, + canonical_query_string, + canonical_headers, + signed_headers, + hashed_payload + ); + + (canonical_request, signed_headers, canonical_headers) +} + +fn credential_scope(date: &str, region: &str, service: &str) -> String { + format!( + "{}/{}/{}/aws4_request", + date, + lowercase(region), + lowercase(service) + ) +} + +fn string_to_sign(scope: &str, amz_date: &str, canonical_request: &str) -> String { + format!( + "{}\n{}\n{}\n{}", + "AWS4-HMAC-SHA256", + amz_date, + scope, + hex(sha256hash(canonical_request)) + ) +} + +fn derive_signing_key(key: &str, date: &str, region: &str, service: &str) -> Vec { + let secret_key = format!("AWS4{}", key); + let date_key = hmac_sha256(secret_key.as_bytes(), date); + let date_region_key = hmac_sha256(&date_key, region); + let date_region_service_key = hmac_sha256(&date_region_key, service); + hmac_sha256(&date_region_service_key, "aws4_request") +} + +fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec { + hmac_sha256(signing_key, string_to_sign) +} + +// --- API --- +pub fn signature( + method: http::Method, + uri: http::Uri, + mut headers: Vec<(String, String)>, + hashed_payload: &str, + service: &str, + region: &str, + secret_key: &str, + access_key: &str, +) -> (String, Vec<(String, String)>) { + let now = Utc::now(); + let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); + let date_stamp = now.format("%Y%m%d").to_string(); + + // Add x-amz-date header if not already present + if !headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) + { + headers.push(("x-amz-date".to_string(), amz_date.clone())); + } + + // Canonical request + let (canonical_request, signed_headers, _canonical_headers) = + create_canonical_request(method, uri, headers.clone(), hashed_payload); + + // String to sign + let scope = credential_scope(&date_stamp, region, service); + let string_to_sign = string_to_sign(&scope, &amz_date, &canonical_request); + + // Signing key + signature + let signing_key = derive_signing_key(secret_key, &date_stamp, region, service); + let signature = hex(calculate_signature(&signing_key, &string_to_sign)); + + // Authorization header + let credential = format!("{}/{}", access_key, scope); + let auth_header = format!( + "{} Credential={}, SignedHeaders={}, Signature={}", + "AWS4-HMAC-SHA256", credential, signed_headers, signature + ); + + headers.push(("authorization".to_string(), auth_header)); + + (signature, headers) +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..38cd4b9 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,15 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum R2Error { + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + #[error("HTTP error: {0}")] + Http(#[from] reqwest::Error), + #[error("XML parse error: {0}")] + Xml(#[from] xmltree::ParseError), + #[error("Missing environment varibles: {0}")] + Env(String), + #[error("Other: {0}")] + Other(String), +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..02b7efa --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,19 @@ +mod aws_signing; +mod error; +mod mimetypes; +pub use error::R2Error; + +mod _async; +#[cfg(feature = "async")] +pub use _async::{R2Bucket, R2Client}; + +#[cfg(feature = "sync")] +pub mod sync; + +#[cfg(test)] +mod test { + // use crate::{R2Bucket, R2Client, sync}; + + #[test] + fn test() {} +} diff --git a/src/mimetypes.rs b/src/mimetypes.rs new file mode 100644 index 0000000..28ffcc1 --- /dev/null +++ b/src/mimetypes.rs @@ -0,0 +1,103 @@ + + +pub enum Mime{} + +impl Mime { + pub fn get_mimetype(key: &str) -> &'static str { + match key { + // Image formats + ".png" => "image/png", + ".jpg" | ".jpeg" => "image/jpeg", + ".gif" => "image/gif", + ".svg" => "image/svg+xml", + ".ico" => "image/x-icon", + ".webp" => "image/webp", + + // Audio formats + ".m4a" => "audio/x-m4a", + ".mp3" => "audio/mpeg", + ".wav" => "audio/wav", + ".ogg" => "audio/ogg", + + // Video formats + ".mp4" => "video/mp4", + ".avi" => "video/x-msvideo", + ".mov" => "video/quicktime", + ".flv" => "video/x-flv", + ".wmv" => "video/x-ms-wmv", + ".webm" => "video/webm", + + // Document formats + ".pdf" => "application/pdf", + ".doc" => "application/msword", + ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".ppt" => "application/vnd.ms-powerpoint", + ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ".xls" => "application/vnd.ms-excel", + ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".txt" => "text/plain", + + // Web formats + ".html" => "text/html", + ".css" => "text/css", + ".js" => "application/javascript", + ".json" => "application/json", + ".xml" => "application/xml", + + // Other formats + ".csv" => "text/csv", + ".zip" => "application/zip", + ".tar" => "application/x-tar", + ".gz" => "application/gzip", + ".rar" => "application/vnd.rar", + ".7z" => "application/x-7z-compressed", + ".eps" => "application/postscript", + ".sql" => "application/sql", + ".java" => "text/x-java-source", + _ => "application/octet-stream", + } + } + + pub fn get_mimetype_from_fp(file_path: &str) -> &str { + // Sorry I just really wanted to get it done in a one liner. + // This splits a filepath based off ".", in reverse order, so that the first element will + // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") + // This is formated back to ".jpeg" because it's how the match statement is working. + // I could very easily change it but idk it was an interesting thing. + Self::get_mimetype( + &format!( + ".{}", file_path.rsplit(".") + .next() + .unwrap_or("time_to_be_an_octet_stream_lmao") + ) + ) + } + +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn match_mime_test() { + assert_eq!(Mime::get_mimetype(".tar"), "application/x-tar"); + } + + #[test] + fn default_mime_test() { + assert_eq!(Mime::get_mimetype(".bf"), "application/octet-stream"); + } + + #[test] + fn mime_from_file() { + assert_eq!(Mime::get_mimetype_from_fp("test.ico"), "image/x-icon"); + } + + #[test] + fn mime_from_file_path() { + assert_eq!(Mime::get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), "application/pdf"); + assert_eq!(Mime::get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), "text/plain") + } + +} \ No newline at end of file diff --git a/src/sync.rs b/src/sync.rs new file mode 100644 index 0000000..57daa66 --- /dev/null +++ b/src/sync.rs @@ -0,0 +1,4 @@ +mod r2bucket; +mod r2client; +pub use r2bucket::R2Bucket; +pub use r2client::R2Client; diff --git a/src/sync/r2bucket.rs b/src/sync/r2bucket.rs new file mode 100644 index 0000000..b8d6e36 --- /dev/null +++ b/src/sync/r2bucket.rs @@ -0,0 +1,67 @@ +use crate::R2Error; +use crate::sync::R2Client; + +#[derive(Clone, Debug)] +pub struct R2Bucket { + bucket: String, + pub client: R2Client, +} + +impl R2Bucket { + pub fn new(bucket: String, client: R2Client) -> Self { + Self { bucket, client } + } + + pub fn from_credentials( + bucket: String, + access_key: String, + secret_key: String, + endpoint: String, + ) -> Self { + let client = R2Client::from_credentials(access_key, secret_key, endpoint); + Self { bucket, client } + } + + pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> { + self.client + .upload_file(&self.bucket, local_file_path, r2_file_key) + } + + pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + self.client + .download_file(&self.bucket, r2_file_key, local_path) + } + + pub fn list_files(&self) -> Result>, R2Error> { + self.client.list_files(&self.bucket) + } + + pub fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sync::R2Bucket; + use std::env; + + fn get_test_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let access_key = + env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); + let secret_key = + env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); + let endpoint = env::var("R2_ENDPOINT") + .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); + let client = R2Client::from_credentials(access_key, secret_key, endpoint); + R2Bucket::new("test-bucket".to_string(), client) + } + + #[test] + fn test_bucket_construction() { + let bucket = get_test_bucket(); + assert_eq!(bucket.bucket, "test-bucket"); + } +} diff --git a/src/sync/r2client.rs b/src/sync/r2client.rs new file mode 100644 index 0000000..36470a3 --- /dev/null +++ b/src/sync/r2client.rs @@ -0,0 +1,387 @@ +use crate::R2Error; +use crate::mimetypes::Mime; +use chrono::Utc; +use hmac::{Hmac, Mac}; +use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue}; +use sha2::{Digest, Sha256}; +use std::collections::HashMap; + +type HmacSHA256 = Hmac; + +#[derive(Clone, Debug)] +pub struct R2Client { + access_key: String, + secret_key: String, + endpoint: String, +} + +impl R2Client { + fn get_env() -> Result<(String, String, String), R2Error> { + let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; + let values = keys + .map(|key| { std::env::var(key).map_err(|_| R2Error::Env(key.to_owned())) }.unwrap()); + Ok(values.into()) + } + + pub fn new() -> Self { + let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); + + Self { + access_key, + secret_key, + endpoint, + } + } + + pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { + Self { + access_key, + secret_key, + endpoint, + } + } + + fn sign(&self, key: &[u8], msg: &str) -> Vec { + let mut mac = HmacSHA256::new_from_slice(key).expect("Invalid key length"); + mac.update(msg.as_bytes()); + mac.finalize().into_bytes().to_vec() + } + + fn get_signature_key(&self, date_stamp: &str, region: &str, service: &str) -> Vec { + let aws4_secret: String = format!("AWS4{}", self.secret_key); + let k_date = self.sign(aws4_secret.as_bytes(), date_stamp); + let k_region = self.sign(&k_date, region); + let k_service = self.sign(&k_region, service); + self.sign(&k_service, "aws4_request") + } + + fn create_headers( + &self, + method: &str, + bucket: &str, + key: &str, + payload_hash: &str, + content_type: &str, + ) -> Result { + // Robustly extract host from endpoint + let endpoint = self.endpoint.trim_end_matches('/'); + // Not proud of this, it is really dumb and hard to read, but it'll work I suppose...I think... + let host = endpoint + .split("//") + .nth(1) + .unwrap_or(endpoint) + .split('/') + .next() + .unwrap_or(endpoint) + .split(':') + .next() + .unwrap_or(endpoint) + .trim(); + if host.is_empty() { + return Err(R2Error::Other( + "Host header could not be determined from endpoint".to_string(), + )); + } + let t = Utc::now(); + let amz_date = t.format("%Y%m%dT%H%M%SZ").to_string(); + let date_stamp = t.format("%Y%m%d").to_string(); + + let mut headers_vec = [ + ("host", host), + ("x-amz-date", &amz_date), + ("x-amz-content-sha256", payload_hash), + ("content-type", content_type), + ]; + headers_vec.sort_by(|a, b| a.0.cmp(b.0)); + + let signed_headers = headers_vec + .iter() + .map(|(k, _)| *k) + .collect::>() + .join(";"); + let canonical_headers = headers_vec + .iter() + .map(|(k, v)| format!("{}:{}\n", k.to_lowercase(), v)) + .collect::(); + + let canonical_uri = format!("/{}/{}", bucket, key); + let canonical_request = format!( + "{method}\n{uri}\n\n{headers}\n{signed_headers}\n{payload_hash}", + method = method, + uri = canonical_uri, + headers = canonical_headers, + signed_headers = signed_headers, + payload_hash = payload_hash + ); + let credential_scope = format!("{}/{}/s3/aws4_request", date_stamp, "auto"); + let hashed_request = hex::encode(Sha256::digest(canonical_request.as_bytes())); + let string_to_sign = format!( + "AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}", + amz_date = amz_date, + credential_scope = credential_scope, + hashed_request = hashed_request + ); + let signing_key = self.get_signature_key(&date_stamp, "auto", "s3"); + let signature = hex::encode(self.sign(&signing_key, &string_to_sign)); + let authorization = format!( + "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}", + self.access_key, credential_scope, signed_headers, signature + ); + + // Print all headers for debugging + println!("[r2client] DEBUG: Built headers:"); + println!(" host: {}", host); + println!(" x-amz-date: {}", amz_date); + println!(" x-amz-content-sha256: {}", payload_hash); + println!(" content-type: {}", content_type); + println!(" authorization: {}", authorization); + println!(" signed_headers: {}", signed_headers); + println!( + " canonical_headers: {}", + canonical_headers.replace("\n", "\\n") + ); + println!( + " canonical_request: {}", + canonical_request.replace("\n", "\\n") + ); + println!(" string_to_sign: {}", string_to_sign.replace("\n", "\\n")); + println!(" signature: {}", signature); + + let mut header_map = HeaderMap::new(); + header_map.insert( + HeaderName::from_static("x-amz-date"), + HeaderValue::from_str(&amz_date) + .map_err(|e| R2Error::Other(format!("Invalid x-amz-date: {e}")))?, + ); + header_map.insert( + HeaderName::from_static("x-amz-content-sha256"), + HeaderValue::from_str(payload_hash).map_err(|e| { + R2Error::Other(format!( + "Invalid x-amz-content-sha256: {payload_hash:?}: {e}" + )) + })?, + ); + header_map.insert( + HeaderName::from_static("authorization"), + HeaderValue::from_str(&authorization).map_err(|e| { + R2Error::Other(format!( + "Invalid authorization: {e}\nValue: {authorization}" + )) + })?, + ); + header_map.insert( + HeaderName::from_static("content-type"), + HeaderValue::from_str(content_type) + .map_err(|e| R2Error::Other(format!("Invalid content-type: {e}")))?, + ); + header_map.insert( + HeaderName::from_static("host"), + HeaderValue::from_str(host) + .map_err(|e| R2Error::Other(format!("Invalid host: {e}")))?, + ); + Ok(header_map) + } + + pub fn upload_file( + &self, + bucket: &str, + local_file_path: &str, + r2_file_key: &str, + ) -> Result<(), R2Error> { + let file_data = std::fs::read(local_file_path)?; + let mut hasher = Sha256::new(); + hasher.update(&file_data); + let payload_hash = hex::encode(hasher.finalize()); + // let content_type = "application/octet-stream"; + let content_type = Mime::get_mimetype_from_fp(local_file_path); + let headers = + self.create_headers("PUT", bucket, r2_file_key, &payload_hash, content_type)?; + let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); + let client = reqwest::blocking::Client::new(); + let resp = client + .put(&file_url) + .headers(headers) + .body(file_data) + .send()?; + let status = resp.status(); + let text = resp.text()?; + if status.is_success() { + Ok(()) + } else { + Err(R2Error::Other(format!( + "Upload failed with status {}: {}", + status, text + ))) + } + } + pub fn download_file(&self, bucket: &str, key: &str, local_path: &str) -> Result<(), R2Error> { + let payload_hash = "UNSIGNED-PAYLOAD"; + let content_type = "application/octet-stream"; + let headers = self.create_headers("GET", bucket, key, payload_hash, content_type)?; + let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); + let client = reqwest::blocking::Client::new(); + let resp = client.get(&file_url).headers(headers).send()?; + let status = resp.status(); + let content = resp.bytes()?; + if status.is_success() { + std::fs::write(local_path, &content)?; + Ok(()) + } else { + Err(R2Error::Other(format!( + "Download failed with status {}", + status + ))) + } + } + fn get_bucket_listing(&self, bucket: &str) -> Result { + let payload_hash = "UNSIGNED-PAYLOAD"; + let content_type = "application/xml"; + let headers = self.create_headers("GET", bucket, "", payload_hash, content_type)?; + let url = self.build_url(bucket, None); + let client = reqwest::blocking::Client::new(); + let resp = client + .get(&url) + .headers(headers) + .send() + .map_err(R2Error::from)?; + let status = resp.status(); + if status.is_success() { + resp.text().map_err(R2Error::from) + } else { + Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + } + } + + /// List all files in the specified bucket. Returns a HashMap of folder -> `Vec`. + pub fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket)?; + let mut files_dict: HashMap> = HashMap::new(); + let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; + for content in root + .children + .iter() + .filter_map(|c| c.as_element()) + .filter(|e| e.name == "Contents") + { + let key_elem = content.get_child("Key").and_then(|k| k.get_text()); + if let Some(file_key) = key_elem { + let (folder, file_name): (String, String) = if let Some(idx) = file_key.rfind('/') { + (file_key[..idx].to_string(), file_key[idx + 1..].to_string()) + } else { + ("".to_string(), file_key.to_string()) + }; + files_dict.entry(folder).or_default().push(file_name); + } + } + Ok(files_dict) + } + + /// List all folders in the specified bucket. Returns a Vec of folder names. + pub fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket)?; + let mut folders = std::collections::HashSet::new(); + let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; + for content in root + .children + .iter() + .filter_map(|c| c.as_element()) + .filter(|e| e.name == "Contents") + { + let key_elem = content.get_child("Key").and_then(|k| k.get_text()); + if let Some(file_key) = key_elem { + if let Some(idx) = file_key.find('/') { + folders.insert(file_key[..idx].to_string()); + } + } + } + Ok(folders.into_iter().collect()) + } + + fn build_url(&self, bucket: &str, key: Option<&str>) -> String { + match key { + Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + None => format!("{}/{}/", self.endpoint, bucket), + } + } +} +impl Default for R2Client { + fn default() -> Self { + let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); + + Self { + access_key, + secret_key, + endpoint, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn r2client_from_env() -> R2Client { + unsafe { + std::env::set_var("R2_ACCESS_KEY", "AKIAEXAMPLE"); + std::env::set_var("R2_SECRET_KEY", "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"); + std::env::set_var("R2_ENDPOINT", "https://example.r2.cloudflarestorage.com"); + } + R2Client::new() + } + + #[test] + fn r2client_env() { + let r2client = r2client_from_env(); + + assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + assert_eq!( + r2client.secret_key, + "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + ); + assert_eq!( + r2client.endpoint, + "https://example.r2.cloudflarestorage.com" + ); + } + + #[test] + fn test_sign_and_signature_key() { + let client = R2Client::from_credentials( + "AKIAEXAMPLE".to_string(), + "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), + "https://example.r2.cloudflarestorage.com".to_string(), + ); + let key = b"testkey"; + let msg = "testmsg"; + let sig = client.sign(key, msg); + assert_eq!(sig.len(), 32); // HMAC-SHA256 output is 32 bytes + + let date = "20250101"; + let region = "auto"; + let service = "s3"; + let signing_key = client.get_signature_key(date, region, service); + assert_eq!(signing_key.len(), 32); + } + + #[test] + fn test_create_headers() { + let client = R2Client::from_credentials( + "AKIAEXAMPLE".to_string(), + "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), + "https://example.r2.cloudflarestorage.com".to_string(), + ); + let headers = client + .create_headers( + "PUT", + "bucket", + "key", + "deadbeef", + "application/octet-stream", + ) + .unwrap(); + assert!(headers.contains_key("x-amz-date")); + assert!(headers.contains_key("authorization")); + assert!(headers.contains_key("content-type")); + assert!(headers.contains_key("host")); + } +} diff --git a/todo.md b/todo.md new file mode 100644 index 0000000..b94c17b --- /dev/null +++ b/todo.md @@ -0,0 +1,6 @@ +Okay I think I did everything, so to clean up: + +- [ ] Update the sync library +- [X] Make a .env with test-bucket creds +- [ ] Actually test the damn thing +- [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?) From 8b3cc6cb3eb582496d4b2a5833a460665de121d3 Mon Sep 17 00:00:00 2001 From: foreverpyrite <51493121+ForeverPyrite@users.noreply.github.com> Date: Wed, 10 Sep 2025 00:23:57 -0400 Subject: [PATCH 2/4] Giving up for now, probably won't revisit, I'm quite literally getting skill-diffed by some documentation and examples. --- Cargo.toml | 2 +- src/_async/r2bucket.rs | 24 ++- src/_async/r2client.rs | 179 +++++++++++---------- src/aws_signing.rs | 257 ++++++++++++++++++++---------- src/error.rs | 4 +- src/sync/r2bucket.rs | 57 ++++--- src/sync/r2client.rs | 346 +++++++++++++++-------------------------- tests/r2_tests.rs | 137 ++++++++++++++++ todo.md | 25 ++- 9 files changed, 608 insertions(+), 423 deletions(-) create mode 100644 tests/r2_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 091590b..f2d3db4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ urlencoding = "2.1.3" http = "1.3.1" [dev-dependencies] -tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] } dotenv = "0.15" [features] diff --git a/src/_async/r2bucket.rs b/src/_async/r2bucket.rs index fcd3cae..ec4a471 100644 --- a/src/_async/r2bucket.rs +++ b/src/_async/r2bucket.rs @@ -1,14 +1,21 @@ use crate::R2Client; use crate::R2Error; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Bucket { bucket: String, pub client: R2Client, } impl R2Bucket { - pub fn new(bucket: String, client: R2Client) -> Self { + pub fn new(bucket: String) -> Self { + Self { + bucket, + client: R2Client::new(), + } + } + + pub fn from_client(bucket: String, client: R2Client) -> Self { Self { bucket, client } } @@ -35,7 +42,7 @@ impl R2Bucket { pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client - .download_file(&self.bucket, r2_file_key, local_path) + .download_file(&self.bucket, r2_file_key, local_path, None) .await } @@ -53,19 +60,10 @@ impl R2Bucket { #[cfg(test)] mod tests { use super::*; - use crate::R2Client; - use std::env; fn get_test_bucket() -> R2Bucket { dotenv::dotenv().ok(); - let access_key = - env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); - let secret_key = - env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); - let endpoint = env::var("R2_ENDPOINT") - .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); - let client = R2Client::from_credentials(access_key, secret_key, endpoint); - R2Bucket::new("test-bucket".to_string(), client) + R2Bucket::new("test-bucket".to_string()) } #[test] diff --git a/src/_async/r2client.rs b/src/_async/r2client.rs index 0b5c722..5f3e67c 100644 --- a/src/_async/r2client.rs +++ b/src/_async/r2client.rs @@ -1,18 +1,16 @@ +use crate::R2Error; +use crate::aws_signing::Sigv4Client; use crate::mimetypes::Mime; -use crate::{R2Error, aws_signing}; use http::Method; -use reqwest::header::{self, HeaderName, HeaderValue}; -use sha2::{Digest, Sha256}; +use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Client { - access_key: String, - secret_key: String, + sigv4: Sigv4Client, endpoint: String, } - impl R2Client { fn get_env() -> Result<(String, String, String), R2Error> { let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; @@ -25,16 +23,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -44,40 +40,47 @@ impl R2Client { method: http::Method, bucket: &str, key: Option<&str>, - payload_hash: &str, + payload: impl AsRef<[u8]>, content_type: Option<&str>, - ) -> Result { + extra_headers: Option>, + ) -> Result { let uri = http::Uri::from_str(&self.build_url(bucket, key)) .expect("invalid uri rip (make sure the build_url function works as intended)"); - let mut headers = Vec::new(); - if method == Method::GET { - headers.push(( - "x-amz-content-sha256".to_string(), - "UNSIGNED-PAYLOAD".to_string(), - )) - } + let mut headers = extra_headers.unwrap_or_default(); + headers.push(( + "host".to_string(), + uri.host().expect("Should have host in URI").to_owned(), + )); if let Some(content_type) = content_type { headers.push(("content-type".to_string(), content_type.to_owned())) } - let (_, headers) = aws_signing::signature( - method, - uri, - headers, - payload_hash, - "s3", - "us-east-1", - &self.secret_key, - &self.access_key, - ); - let mut header_map = header::HeaderMap::new(); - for header in headers { - header_map.insert( - HeaderName::from_lowercase(&header.0.to_lowercase().as_bytes()) - .expect("shit tragic"), - HeaderValue::from_str(&header.1).expect("shit more tragic"), - ); - } + let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); + // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap + // directly. + // // Granted this impl had much better handling, fuck it we ball. + // let mut header_map = header::HeaderMap::new(); + // for header in headers { + // header_map.insert( + // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header name out of {}", + // header.0.to_lowercase() + // ) + // }) + // .unwrap(), + // HeaderValue::from_str(&header.1) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", + // header.1, + // header.0.to_lowercase(), + // ) + // }) + // .unwrap(), + // ); + // } Ok(header_map) } @@ -88,9 +91,12 @@ impl R2Client { r2_file_key: &str, content_type: Option<&str>, ) -> Result<(), R2Error> { - // --- Hash Payload -- - let file_data = std::fs::read(local_file_path)?; - let payload_hash = hex::encode(Sha256::digest(&file_data)); + // Payload (file data) + let payload = std::fs::read(local_file_path)?; + println!( + "[upload_file] Payload hash for signing: {}", + crate::aws_signing::hash(&payload) + ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { @@ -102,15 +108,17 @@ impl R2Client { Method::PUT, bucket, Some(r2_file_key), - &payload_hash, + &payload, content_type, + None, )?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); + println!("[upload_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(r2_file_key)); let client = reqwest::Client::new(); let resp = client .put(&file_url) .headers(headers) - .body(file_data) + .body(payload) .send() .await?; let status = resp.status(); @@ -118,10 +126,13 @@ impl R2Client { if status.is_success() { Ok(()) } else { - Err(R2Error::Other(format!( - "Upload failed with status {}: {}", - status, text - ))) + Err(R2Error::FailedRequest( + format!( + "upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\"" + ), + status, + text, + )) } } pub async fn download_file( @@ -129,36 +140,35 @@ impl R2Client { bucket: &str, key: &str, local_path: &str, + extra_headers: Option>, ) -> Result<(), R2Error> { - let payload_hash = hex::encode(Sha256::digest("")); - let content_type = Mime::get_mimetype_from_fp(local_path); - let headers = self.create_headers( - Method::GET, - bucket, - Some(key), - &payload_hash, - Some(content_type), - )?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); + // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. + // I don't know if I should trust it though, I don't see public impls with this. + let payload = ""; + println!("[download_file] Payload for signing: (empty)"); + let headers = + self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; + println!("[download_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(key)); let client = reqwest::Client::new(); let resp = client.get(&file_url).headers(headers).send().await?; let status = resp.status(); - let content = resp.bytes().await?; if status.is_success() { - std::fs::write(local_path, &content)?; + std::fs::write(local_path, resp.bytes().await?)?; Ok(()) } else { - Err(R2Error::Other(format!( - "Download failed with status {}", - status - ))) + Err(R2Error::FailedRequest( + format!("dowloading file \"{key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) } } async fn get_bucket_listing(&self, bucket: &str) -> Result { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/xml"; - let headers = - self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; + let payload = ""; + println!("[get_bucket_listing] Payload for signing: (empty)"); + let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; + println!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); let client = reqwest::Client::new(); let resp = client @@ -169,9 +179,13 @@ impl R2Client { .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - resp.text().await.map_err(R2Error::from) + Ok(resp.text().await.map_err(R2Error::from)?) } else { - Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + Err(R2Error::FailedRequest( + String::from("list bucket...folders or something idfk"), + status, + resp.text().await.map_err(R2Error::from)?, + )) } } @@ -220,20 +234,17 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { - Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + Some(k) => { + let encoded_key = crate::aws_signing::url_encode(k); + format!("{}/{}/{}", self.endpoint, bucket, encoded_key) + } None => format!("{}/{}/", self.endpoint, bucket), } } } impl Default for R2Client { fn default() -> Self { - let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); - - Self { - access_key, - secret_key, - endpoint, - } + Self::new() } } @@ -254,11 +265,12 @@ mod tests { fn r2client_env() { let r2client = r2client_from_env(); - assert_eq!(r2client.access_key, "AKIAEXAMPLE"); - assert_eq!( - r2client.secret_key, - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" - ); + // Sorry but I don't know if I should have the keys on the sigv4 pub or not yet + // assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + // assert_eq!( + // r2client.secret_key, + // "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + // ); assert_eq!( r2client.endpoint, "https://example.r2.cloudflarestorage.com" @@ -279,6 +291,7 @@ mod tests { Some("key"), "deadbeef", Some("application/octet-stream"), + None, ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/src/aws_signing.rs b/src/aws_signing.rs index b3b5333..5c792b1 100644 --- a/src/aws_signing.rs +++ b/src/aws_signing.rs @@ -4,6 +4,8 @@ use sha2::{Digest, Sha256}; type Hmac256 = Hmac; +const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + // --- Utility functions --- fn lowercase(string: &str) -> String { string.to_lowercase() @@ -27,11 +29,11 @@ fn trim(string: &str) -> String { string.trim().to_string() } -fn hash>(payload: T) -> String { +pub fn hash>(payload: T) -> String { hex(sha256hash(payload)) } -fn url_encode(url: &str) -> String { +pub fn url_encode(url: &str) -> String { let mut url = urlencoding::encode(url).into_owned(); let encoded_to_replacement: [(&str, &str); 4] = [("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")]; @@ -41,13 +43,16 @@ fn url_encode(url: &str) -> String { url } +// --- Signing Functions --- +// These don't use any parts of the Sigv4Client, so they are external + // --- Canonical request --- fn create_canonical_request( method: http::Method, uri: http::Uri, mut headers: Vec<(String, String)>, hashed_payload: &str, -) -> (String, String, String) { +) -> (String, Vec<(String, String)>, String) { // HTTPMethod let http_method = method.to_string(); @@ -70,14 +75,14 @@ fn create_canonical_request( pairs.sort_by(|a, b| a.0.cmp(&b.0)); pairs .into_iter() - .map(|(k, v)| format!("{}={}", k, v)) + .map(|(k, v)| format!("{k}={v}")) .collect::>() .join("&") } else { String::new() }; - // Ensure required headers (host and x-amz-date) are present + // checks for proper host headers let host = uri .host() .expect("uri passed without a proper host") @@ -86,6 +91,16 @@ fn create_canonical_request( headers.push(("host".to_string(), host)); } + if !headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256")) + { + headers.push(( + "x-amz-content-sha256".to_string(), + hashed_payload.to_owned(), + )) + } + // CanonicalHeaders + SignedHeaders let mut http_headers = headers .iter() @@ -95,7 +110,7 @@ fn create_canonical_request( let canonical_headers: String = http_headers .iter() - .map(|(k, v)| format!("{}:{}\n", k, v)) + .map(|(k, v)| format!("{k}:{v}\n")) .collect(); let signed_headers: String = http_headers @@ -106,92 +121,174 @@ fn create_canonical_request( // Final canonical request let canonical_request = format!( - "{}\n{}\n{}\n{}\n{}\n{}", - http_method, - canonical_uri, - canonical_query_string, - canonical_headers, - signed_headers, - hashed_payload + "{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}" ); - (canonical_request, signed_headers, canonical_headers) + (canonical_request, http_headers, signed_headers) } - -fn credential_scope(date: &str, region: &str, service: &str) -> String { - format!( - "{}/{}/{}/aws4_request", - date, - lowercase(region), - lowercase(service) - ) -} - -fn string_to_sign(scope: &str, amz_date: &str, canonical_request: &str) -> String { - format!( - "{}\n{}\n{}\n{}", - "AWS4-HMAC-SHA256", - amz_date, - scope, - hex(sha256hash(canonical_request)) - ) -} - -fn derive_signing_key(key: &str, date: &str, region: &str, service: &str) -> Vec { - let secret_key = format!("AWS4{}", key); - let date_key = hmac_sha256(secret_key.as_bytes(), date); - let date_region_key = hmac_sha256(&date_key, region); - let date_region_service_key = hmac_sha256(&date_region_key, service); - hmac_sha256(&date_region_service_key, "aws4_request") -} - fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec { hmac_sha256(signing_key, string_to_sign) } -// --- API --- -pub fn signature( - method: http::Method, - uri: http::Uri, - mut headers: Vec<(String, String)>, - hashed_payload: &str, - service: &str, - region: &str, - secret_key: &str, - access_key: &str, -) -> (String, Vec<(String, String)>) { - let now = Utc::now(); - let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); - let date_stamp = now.format("%Y%m%d").to_string(); - - // Add x-amz-date header if not already present - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) - { - headers.push(("x-amz-date".to_string(), amz_date.clone())); +fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String { + format!( + "{}\n{}\n{}\n{}", + "AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request + ) +} +/// Structure containing all the data relevant for an AWS Service utilizing SigV4. +/// Service: String containing the AWS service (e.g. "ec2" or "s3") +/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1") +/// Access Key: The "Access Key" to use with the AWS service (crazy, ik) +/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah) +/// +/// ``` +/// +/// +/// use aws_signing::Sigv4Client; +/// +/// let s3_client = Sigv4Client::new( +/// "s3", +/// "us-east-1", +/// std::env::var("S3_ACCESS_KEY").unwrap(), +/// std::env::var("S3_SECRET_KEY").unwrap(), +/// ) +/// let (_, request_headers) = s3client.signature( +/// http::Method::GET, +/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"), +/// vec![("content-type", "text/plain")], +/// "" // Since it's a GET request, the payload is "" +/// ) +/// ``` +#[derive(Debug)] +// A more mature client would also have session_key: Option, but not my problem +pub struct Sigv4Client { + // Would it makes more sense for these to be type generics + // with trait param ToString? + // Either that or just &str or String...wait, union? + // Nah there has to be a better way to do it than...that + // but I don't wanna enum!!! + service: String, + region: String, + // Would it makes more sense for these to be type generics + // with trait param AsRef<[u8]>? + access_key: String, + secret_key: String, +} +/// NOTE: This only impliments functions that require one of the Sigv4Client fields. +/// For other functions related to the signing proccess, they are defined above, including the +/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html +impl Sigv4Client { + /// Creates a new instance of the Sigv4Client for a particular service, in a region, with your + /// private and public access keys. + /// + /// For some reason this function will take any values that impl Into, so you can pass + /// &str, String, or something else if you decide to get freaky. + pub fn new( + service: impl Into, + region: impl Into, + pub_key: impl Into, + priv_key: impl Into, + ) -> Self { + Self { + service: service.into(), + region: region.into(), + access_key: pub_key.into(), + secret_key: priv_key.into(), + } } - // Canonical request - let (canonical_request, signed_headers, _canonical_headers) = - create_canonical_request(method, uri, headers.clone(), hashed_payload); + // In a more mature client, this might be an enum of AWSRegions + // I also don't even know if this could ever be useful lol, wouldn't you have individual + // clients for each region or use "auto" for AWS to figure it out for you? whatever. + pub fn set_region(&mut self, region: impl Into) { + self.region = region.into() + } - // String to sign - let scope = credential_scope(&date_stamp, region, service); - let string_to_sign = string_to_sign(&scope, &amz_date, &canonical_request); + fn credential_scope(&self, date: &str) -> String { + format!( + "{}/{}/{}/aws4_request", + date, + lowercase(&self.region), + lowercase(&self.service) + ) + } - // Signing key + signature - let signing_key = derive_signing_key(secret_key, &date_stamp, region, service); - let signature = hex(calculate_signature(&signing_key, &string_to_sign)); + fn derive_signing_key(&self, date: &str) -> Vec { + let secret_key = &self.secret_key; + let key = format!("AWS4{secret_key}"); + let date_key = hmac_sha256(key.as_bytes(), date); + let date_region_key = hmac_sha256(&date_key, &self.region); + let date_region_service_key = hmac_sha256(&date_region_key, &self.service); + hmac_sha256(&date_region_service_key, "aws4_request") + } - // Authorization header - let credential = format!("{}/{}", access_key, scope); - let auth_header = format!( - "{} Credential={}, SignedHeaders={}, Signature={}", - "AWS4-HMAC-SHA256", credential, signed_headers, signature - ); + // --- API --- + /// This is the only function to use <3 + pub fn signature>( + &self, + method: http::Method, + uri: http::Uri, + // Should probably make this a header map, then turn it into a Vec(String, String) to sort + // by header name cause Amazon said so. + mut headers: Vec<(String, String)>, + payload: T, + ) -> (String, http::HeaderMap) { + let auth_algorithm = "AWS4-HMAC-SHA256"; + let now = Utc::now(); + let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); + let date = now.format("%Y%m%d").to_string(); + let payload_as_bytes = payload.as_ref(); + let payload_hash = if payload_as_bytes.is_empty() { + EMPTY_PAYLOAD_HASH.to_string() + } else { + hash(payload_as_bytes) + }; - headers.push(("authorization".to_string(), auth_header)); + // Add x-amz-date header if not already present + if !headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) + { + headers.push(("x-amz-date".to_string(), amz_date.clone())); + } - (signature, headers) + // Canonical request + let (canonical_request, mut headers, signed_headers) = + create_canonical_request(method, uri, headers, &payload_hash); + + // String to sign + let scope = self.credential_scope(&date); + let hashed_canonical_request = hash(&canonical_request); + let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request); + + // Signing key + signature + let signing_key = self.derive_signing_key(&date); + let signature = hex(calculate_signature(&signing_key, &string_to_sign)); + + // Authorization header + let access_key = &self.access_key; + let credential = format!("{access_key}/{scope}"); + let auth_header = format!( + "{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}" + ); + + println!("\n--- AWS SigV4 Debug ---"); + println!("1. CanonicalRequest:\n---\n{canonical_request}\n---"); + println!("2. StringToSign:\n---\n{string_to_sign}\n---"); + println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key)); + println!("4. Signature:\n---\n{signature}\n---"); + println!("5. Authorization Header:\n---\n{auth_header}\n---"); + + headers.push(("authorization".to_string(), auth_header)); + + let mut header_map: http::HeaderMap = http::HeaderMap::new(); + for (header, value) in headers.clone() { + header_map.insert( + http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(), + http::HeaderValue::from_str(&value).unwrap(), + ); + } + (signature, header_map) + } } diff --git a/src/error.rs b/src/error.rs index 38cd4b9..9438385 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,6 @@ pub enum R2Error { Xml(#[from] xmltree::ParseError), #[error("Missing environment varibles: {0}")] Env(String), - #[error("Other: {0}")] - Other(String), + #[error("Request failed during operation {0}: {1}\n{2}")] + FailedRequest(String, http::StatusCode, String), } diff --git a/src/sync/r2bucket.rs b/src/sync/r2bucket.rs index b8d6e36..ec4a471 100644 --- a/src/sync/r2bucket.rs +++ b/src/sync/r2bucket.rs @@ -1,14 +1,21 @@ +use crate::R2Client; use crate::R2Error; -use crate::sync::R2Client; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Bucket { bucket: String, pub client: R2Client, } impl R2Bucket { - pub fn new(bucket: String, client: R2Client) -> Self { + pub fn new(bucket: String) -> Self { + Self { + bucket, + client: R2Client::new(), + } + } + + pub fn from_client(bucket: String, client: R2Client) -> Self { Self { bucket, client } } @@ -22,41 +29,41 @@ impl R2Bucket { Self { bucket, client } } - pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> { + pub async fn upload_file( + &self, + local_file_path: &str, + r2_file_key: &str, + ) -> Result<(), R2Error> { self.client - .upload_file(&self.bucket, local_file_path, r2_file_key) + // I'm pasing None to let the R2Client derive the content type from the local_file_path + .upload_file(&self.bucket, local_file_path, r2_file_key, None) + .await } - pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client - .download_file(&self.bucket, r2_file_key, local_path) + .download_file(&self.bucket, r2_file_key, local_path, None) + .await } - pub fn list_files(&self) -> Result>, R2Error> { - self.client.list_files(&self.bucket) + pub async fn list_files( + &self, + ) -> Result>, R2Error> { + self.client.list_files(&self.bucket).await } - pub fn list_folders(&self) -> Result, R2Error> { - self.client.list_folders(&self.bucket) + pub async fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket).await } } #[cfg(test)] mod tests { use super::*; - use crate::sync::R2Bucket; - use std::env; fn get_test_bucket() -> R2Bucket { dotenv::dotenv().ok(); - let access_key = - env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); - let secret_key = - env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); - let endpoint = env::var("R2_ENDPOINT") - .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); - let client = R2Client::from_credentials(access_key, secret_key, endpoint); - R2Bucket::new("test-bucket".to_string(), client) + R2Bucket::new("test-bucket".to_string()) } #[test] @@ -64,4 +71,12 @@ mod tests { let bucket = get_test_bucket(); assert_eq!(bucket.bucket, "test-bucket"); } + + // Example async test (requires a runtime, so ignored by default) + // #[tokio::test] + // async fn test_upload_file() { + // let bucket = get_test_bucket(); + // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; + // assert!(result.is_ok()); + // } } diff --git a/src/sync/r2client.rs b/src/sync/r2client.rs index 36470a3..c87abdc 100644 --- a/src/sync/r2client.rs +++ b/src/sync/r2client.rs @@ -1,20 +1,16 @@ use crate::R2Error; +use crate::aws_signing::Sigv4Client; use crate::mimetypes::Mime; -use chrono::Utc; -use hmac::{Hmac, Mac}; -use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue}; -use sha2::{Digest, Sha256}; +use http::Method; +use reqwest::header::HeaderMap; use std::collections::HashMap; +use std::str::FromStr; -type HmacSHA256 = Hmac; - -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Client { - access_key: String, - secret_key: String, + sigv4: Sigv4Client, endpoint: String, } - impl R2Client { fn get_env() -> Result<(String, String, String), R2Error> { let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; @@ -27,234 +23,165 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } - fn sign(&self, key: &[u8], msg: &str) -> Vec { - let mut mac = HmacSHA256::new_from_slice(key).expect("Invalid key length"); - mac.update(msg.as_bytes()); - mac.finalize().into_bytes().to_vec() - } - - fn get_signature_key(&self, date_stamp: &str, region: &str, service: &str) -> Vec { - let aws4_secret: String = format!("AWS4{}", self.secret_key); - let k_date = self.sign(aws4_secret.as_bytes(), date_stamp); - let k_region = self.sign(&k_date, region); - let k_service = self.sign(&k_region, service); - self.sign(&k_service, "aws4_request") - } - fn create_headers( &self, - method: &str, + method: http::Method, bucket: &str, - key: &str, - payload_hash: &str, - content_type: &str, - ) -> Result { - // Robustly extract host from endpoint - let endpoint = self.endpoint.trim_end_matches('/'); - // Not proud of this, it is really dumb and hard to read, but it'll work I suppose...I think... - let host = endpoint - .split("//") - .nth(1) - .unwrap_or(endpoint) - .split('/') - .next() - .unwrap_or(endpoint) - .split(':') - .next() - .unwrap_or(endpoint) - .trim(); - if host.is_empty() { - return Err(R2Error::Other( - "Host header could not be determined from endpoint".to_string(), - )); + key: Option<&str>, + payload: impl AsRef<[u8]>, + content_type: Option<&str>, + ) -> Result { + let uri = http::Uri::from_str(&self.build_url(bucket, key)) + .expect("invalid uri rip (make sure the build_url function works as intended)"); + let mut headers = Vec::new(); + headers.push(( + "host".to_string(), + uri.host().expect("Should have host in URI").to_owned(), + )); + if let Some(content_type) = content_type { + headers.push(("content-type".to_string(), content_type.to_owned())) } - let t = Utc::now(); - let amz_date = t.format("%Y%m%dT%H%M%SZ").to_string(); - let date_stamp = t.format("%Y%m%d").to_string(); - let mut headers_vec = [ - ("host", host), - ("x-amz-date", &amz_date), - ("x-amz-content-sha256", payload_hash), - ("content-type", content_type), - ]; - headers_vec.sort_by(|a, b| a.0.cmp(b.0)); - - let signed_headers = headers_vec - .iter() - .map(|(k, _)| *k) - .collect::>() - .join(";"); - let canonical_headers = headers_vec - .iter() - .map(|(k, v)| format!("{}:{}\n", k.to_lowercase(), v)) - .collect::(); - - let canonical_uri = format!("/{}/{}", bucket, key); - let canonical_request = format!( - "{method}\n{uri}\n\n{headers}\n{signed_headers}\n{payload_hash}", - method = method, - uri = canonical_uri, - headers = canonical_headers, - signed_headers = signed_headers, - payload_hash = payload_hash - ); - let credential_scope = format!("{}/{}/s3/aws4_request", date_stamp, "auto"); - let hashed_request = hex::encode(Sha256::digest(canonical_request.as_bytes())); - let string_to_sign = format!( - "AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}", - amz_date = amz_date, - credential_scope = credential_scope, - hashed_request = hashed_request - ); - let signing_key = self.get_signature_key(&date_stamp, "auto", "s3"); - let signature = hex::encode(self.sign(&signing_key, &string_to_sign)); - let authorization = format!( - "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}", - self.access_key, credential_scope, signed_headers, signature - ); - - // Print all headers for debugging - println!("[r2client] DEBUG: Built headers:"); - println!(" host: {}", host); - println!(" x-amz-date: {}", amz_date); - println!(" x-amz-content-sha256: {}", payload_hash); - println!(" content-type: {}", content_type); - println!(" authorization: {}", authorization); - println!(" signed_headers: {}", signed_headers); - println!( - " canonical_headers: {}", - canonical_headers.replace("\n", "\\n") - ); - println!( - " canonical_request: {}", - canonical_request.replace("\n", "\\n") - ); - println!(" string_to_sign: {}", string_to_sign.replace("\n", "\\n")); - println!(" signature: {}", signature); - - let mut header_map = HeaderMap::new(); - header_map.insert( - HeaderName::from_static("x-amz-date"), - HeaderValue::from_str(&amz_date) - .map_err(|e| R2Error::Other(format!("Invalid x-amz-date: {e}")))?, - ); - header_map.insert( - HeaderName::from_static("x-amz-content-sha256"), - HeaderValue::from_str(payload_hash).map_err(|e| { - R2Error::Other(format!( - "Invalid x-amz-content-sha256: {payload_hash:?}: {e}" - )) - })?, - ); - header_map.insert( - HeaderName::from_static("authorization"), - HeaderValue::from_str(&authorization).map_err(|e| { - R2Error::Other(format!( - "Invalid authorization: {e}\nValue: {authorization}" - )) - })?, - ); - header_map.insert( - HeaderName::from_static("content-type"), - HeaderValue::from_str(content_type) - .map_err(|e| R2Error::Other(format!("Invalid content-type: {e}")))?, - ); - header_map.insert( - HeaderName::from_static("host"), - HeaderValue::from_str(host) - .map_err(|e| R2Error::Other(format!("Invalid host: {e}")))?, - ); + let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); + // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap + // directly. + // // Granted this impl had much better handling, fuck it we ball. + // let mut header_map = header::HeaderMap::new(); + // for header in headers { + // header_map.insert( + // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header name out of {}", + // header.0.to_lowercase() + // ) + // }) + // .unwrap(), + // HeaderValue::from_str(&header.1) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", + // header.1, + // header.0.to_lowercase(), + // ) + // }) + // .unwrap(), + // ); + // } Ok(header_map) } - pub fn upload_file( + pub async fn upload_file( &self, bucket: &str, local_file_path: &str, r2_file_key: &str, + content_type: Option<&str>, ) -> Result<(), R2Error> { - let file_data = std::fs::read(local_file_path)?; - let mut hasher = Sha256::new(); - hasher.update(&file_data); - let payload_hash = hex::encode(hasher.finalize()); - // let content_type = "application/octet-stream"; - let content_type = Mime::get_mimetype_from_fp(local_file_path); - let headers = - self.create_headers("PUT", bucket, r2_file_key, &payload_hash, content_type)?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); - let client = reqwest::blocking::Client::new(); + // Payload (file data) + let payload = std::fs::read(local_file_path)?; + + // Set HTTP Headers + let content_type = if let Some(content_type) = content_type { + Some(content_type) + } else { + Some(Mime::get_mimetype_from_fp(local_file_path)) + }; + let headers = self.create_headers( + Method::PUT, + bucket, + Some(r2_file_key), + &payload, + content_type, + )?; + let file_url = self.build_url(bucket, Some(r2_file_key)); + let client = reqwest::Client::new(); let resp = client .put(&file_url) .headers(headers) - .body(file_data) - .send()?; + .body(payload) + .send() + .await?; let status = resp.status(); - let text = resp.text()?; + let text = resp.text().await?; if status.is_success() { Ok(()) } else { - Err(R2Error::Other(format!( - "Upload failed with status {}: {}", - status, text - ))) + Err(R2Error::FailedRequest( + format!( + "upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\"" + ), + status, + text, + )) } } - pub fn download_file(&self, bucket: &str, key: &str, local_path: &str) -> Result<(), R2Error> { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/octet-stream"; - let headers = self.create_headers("GET", bucket, key, payload_hash, content_type)?; + pub async fn download_file( + &self, + bucket: &str, + key: &str, + local_path: &str, + ) -> Result<(), R2Error> { + // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. + // I don't know if I should trust it though, I don't see public impls with this. + let payload = "UNSIGNED-PAYLOAD"; + let content_type = Mime::get_mimetype_from_fp(local_path); + let headers = + self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?; let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); - let client = reqwest::blocking::Client::new(); - let resp = client.get(&file_url).headers(headers).send()?; + let client = reqwest::Client::new(); + let resp = client.get(&file_url).headers(headers).send().await?; let status = resp.status(); - let content = resp.bytes()?; if status.is_success() { - std::fs::write(local_path, &content)?; + std::fs::write(local_path, resp.bytes().await?)?; Ok(()) } else { - Err(R2Error::Other(format!( - "Download failed with status {}", - status - ))) + Err(R2Error::FailedRequest( + format!("dowloading file \"{key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) } } - fn get_bucket_listing(&self, bucket: &str) -> Result { + async fn get_bucket_listing(&self, bucket: &str) -> Result { let payload_hash = "UNSIGNED-PAYLOAD"; let content_type = "application/xml"; - let headers = self.create_headers("GET", bucket, "", payload_hash, content_type)?; + let headers = + self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; let url = self.build_url(bucket, None); - let client = reqwest::blocking::Client::new(); + let client = reqwest::Client::new(); let resp = client .get(&url) .headers(headers) .send() + .await .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - resp.text().map_err(R2Error::from) + Ok(resp.text().await.map_err(R2Error::from)?) } else { - Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + Err(R2Error::FailedRequest( + String::from("list bucket...folders or something idfk"), + status, + resp.text().await.map_err(R2Error::from)?, + )) } } - /// List all files in the specified bucket. Returns a HashMap of folder -> `Vec`. - pub fn list_files(&self, bucket: &str) -> Result>, R2Error> { - let xml = self.get_bucket_listing(bucket)?; + pub async fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; let mut files_dict: HashMap> = HashMap::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -276,9 +203,8 @@ impl R2Client { Ok(files_dict) } - /// List all folders in the specified bucket. Returns a Vec of folder names. - pub fn list_folders(&self, bucket: &str) -> Result, R2Error> { - let xml = self.get_bucket_listing(bucket)?; + pub async fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; let mut folders = std::collections::HashSet::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -306,13 +232,7 @@ impl R2Client { } impl Default for R2Client { fn default() -> Self { - let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); - - Self { - access_key, - secret_key, - endpoint, - } + Self::new() } } @@ -333,36 +253,18 @@ mod tests { fn r2client_env() { let r2client = r2client_from_env(); - assert_eq!(r2client.access_key, "AKIAEXAMPLE"); - assert_eq!( - r2client.secret_key, - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" - ); + // Sorry but I don't know if I should have the keys on the sigv4 pub or not yet + // assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + // assert_eq!( + // r2client.secret_key, + // "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + // ); assert_eq!( r2client.endpoint, "https://example.r2.cloudflarestorage.com" ); } - #[test] - fn test_sign_and_signature_key() { - let client = R2Client::from_credentials( - "AKIAEXAMPLE".to_string(), - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), - "https://example.r2.cloudflarestorage.com".to_string(), - ); - let key = b"testkey"; - let msg = "testmsg"; - let sig = client.sign(key, msg); - assert_eq!(sig.len(), 32); // HMAC-SHA256 output is 32 bytes - - let date = "20250101"; - let region = "auto"; - let service = "s3"; - let signing_key = client.get_signature_key(date, region, service); - assert_eq!(signing_key.len(), 32); - } - #[test] fn test_create_headers() { let client = R2Client::from_credentials( @@ -372,11 +274,11 @@ mod tests { ); let headers = client .create_headers( - "PUT", + Method::PUT, "bucket", - "key", + Some("key"), "deadbeef", - "application/octet-stream", + Some("application/octet-stream"), ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/tests/r2_tests.rs b/tests/r2_tests.rs new file mode 100644 index 0000000..73f1693 --- /dev/null +++ b/tests/r2_tests.rs @@ -0,0 +1,137 @@ +use std::env; +use std::fs; +use std::io::Write; + +fn create_test_file(path: &str, content: &str) { + let mut file = fs::File::create(path).unwrap(); + file.write_all(content.as_bytes()).unwrap(); +} + +#[cfg(feature = "sync")] +mod sync_tests { + use super::create_test_file; + use r2client::sync::R2Bucket; + use std::env; + use std::fs; + + fn setup_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests"); + let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set"); + let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set"); + let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set"); + R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint) + } + + #[test] + fn test_sync_e2e() { + let bucket = setup_bucket(); + let test_content = "Hello, R2 sync world!"; + let local_upload_path = "test_upload_sync.txt"; + let r2_file_key = "test/test_upload_sync.txt"; + let local_download_path = "test_download_sync.txt"; + + create_test_file(local_upload_path, test_content); + + // 1. Upload file + bucket + .upload_file(local_upload_path, r2_file_key) + .expect("Sync upload failed"); + + // 2. List files and check if it exists + let files = bucket.list_files().expect("Sync list_files failed"); + assert!( + files + .get("test") + .unwrap() + .contains(&"test_upload_sync.txt".to_string()) + ); + + // 3. List folders and check if it exists + let folders = bucket.list_folders().expect("Sync list_folders failed"); + assert!(folders.contains(&"test".to_string())); + + // 4. Download file + bucket + .download_file(r2_file_key, local_download_path) + .expect("Sync download failed"); + + // 5. Verify content + let downloaded_content = fs::read_to_string(local_download_path).unwrap(); + assert_eq!(test_content, downloaded_content); + + // Cleanup + fs::remove_file(local_upload_path).unwrap(); + fs::remove_file(local_download_path).unwrap(); + // NOTE: The test file on R2 is not deleted as there is no delete API yet. + } +} + +#[cfg(feature = "async")] +mod async_tests { + use super::create_test_file; + use r2client::R2Bucket; + use std::env; + use std::fs; + + fn setup_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests"); + let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set"); + let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set"); + let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set"); + R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint) + } + + #[tokio::test] + async fn test_async_e2e() { + let bucket = setup_bucket(); + let test_content = "Hello, R2 async world!"; + let local_upload_path = "test_upload_async.txt"; + let r2_file_key = "test/test_upload_async.txt"; + let local_download_path = "test_download_async.txt"; + + create_test_file(local_upload_path, test_content); + + // 0. List files to see if a get request will go through lol + let files = bucket.list_files().await.expect("Async list_files failed"); + println!("{files:#?}"); + + // 1. Upload file + bucket + .upload_file(local_upload_path, r2_file_key) + .await + .expect("Async upload failed"); + + // 2. List files and check if it exists + let files = bucket.list_files().await.expect("Async list_files failed"); + assert!( + files + .get("test") + .unwrap() + .contains(&"test_upload_async.txt".to_string()) + ); + + // 3. List folders and check if it exists + let folders = bucket + .list_folders() + .await + .expect("Async list_folders failed"); + assert!(folders.contains(&"test".to_string())); + + // 4. Download file + bucket + .download_file(r2_file_key, local_download_path) + .await + .expect("Async download failed"); + + // 5. Verify content + let downloaded_content = fs::read_to_string(local_download_path).unwrap(); + assert_eq!(test_content, downloaded_content); + + // Cleanup + fs::remove_file(local_upload_path).unwrap(); + fs::remove_file(local_download_path).unwrap(); + // NOTE: The test file on R2 is not deleted as there is no delete API yet. + } +} diff --git a/todo.md b/todo.md index b94c17b..562cdd8 100644 --- a/todo.md +++ b/todo.md @@ -1,6 +1,29 @@ Okay I think I did everything, so to clean up: -- [ ] Update the sync library +# Fatal Bug +When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header. + +Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed. +The output of that function includes request and headers with the host headers in there. +I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector +I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it + +The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves. +Technically the output of the canonical_request is correct: +Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"` +However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure. +But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows: +Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"` +Which seems to match up, however the final signature that the main `signature` function returns is: +Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]` +So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one. +This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector +I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything. + +Okay after saying all that, surely I could've just fixed it by now and will remember it in the later. + +- [X] Update the sync library - [X] Make a .env with test-bucket creds - [ ] Actually test the damn thing - [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?) + From 76cc06d7e1a3e50d8ca2021f9123d28535e74752 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Fri, 19 Sep 2025 21:02:12 -0400 Subject: [PATCH 3/4] Pre-release commit Anything below this first part is definitely me being astounded This is the commit where it is in a working state that isn't exactly primitive, but it's not comprehensive, fully fleshed out, or polished either. But I like where it's going You wouldn't believe it. I came back to this after a MASSIVE crash out with how HUGE and CLUNKY trying to use the AWS SDK was. Well Guess what GUESS Literally I just went to test it this morning and I freaking KID YOU NOT it just worked. no changes. it just worked. YOU WANNA KNOW HOW I CAN TELL? Look at this FREAKING COMMIT and see that ALL THAT CHANGED was the todo.md FILE literally what the fart I shouldn't complain but OH MY GOODNESS I spent SO LONG working on it and it turned out it WASN'T all for naught because I guess something was just...stuck. Bad credentials somehow? I don't know anymore. I'm just glad it's over. --- Cargo.toml | 21 +-- src/_async/r2bucket.rs | 26 +--- src/_async/r2client.rs | 88 ++++++------ src/aws_signing.rs | 294 ----------------------------------------- src/lib.rs | 9 -- src/mimetypes.rs | 185 ++++++++++++++------------ src/sync/r2bucket.rs | 50 ++----- src/sync/r2client.rs | 141 +++++++++++--------- tests/r2_tests.rs | 6 +- todo.md | 37 ++---- 10 files changed, 252 insertions(+), 605 deletions(-) delete mode 100644 src/aws_signing.rs diff --git a/Cargo.toml b/Cargo.toml index f2d3db4..6c11923 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,32 +1,23 @@ [package] name = "r2client" -version = "0.1.0" +version = "0.2.0" edition = "2024" [lib] [dependencies] -sha2 = "0.10.9" -bytes = "1.10.1" -reqwest = { version = "0.12.19", features = ["blocking"] } -chrono = "0.4.41" -hex = "0.4.3" -hmac = "0.12.1" +reqwest = "0.12.19" xmltree = "0.11.0" thiserror = "2" -async-trait = "0.1.89" -async-std = { version = "1.0", optional = true } -tokio = { version = "1.0", features = ["rt-multi-thread"], optional = true } -futures-executor = { version = "0.3", optional = true } -urlencoding = "2.1.3" http = "1.3.1" +aws_sigv4 = { path = "../aws_sigv4/" } +log = "0.4.28" [dev-dependencies] tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] } dotenv = "0.15" [features] -async = ["tokio"] +async = [] default = ["async"] -sync = ["tokio/rt-multi-thread", "futures-executor"] -async-std-runtime = ["async-std"] +sync = ["reqwest/blocking"] diff --git a/src/_async/r2bucket.rs b/src/_async/r2bucket.rs index ec4a471..692a4fe 100644 --- a/src/_async/r2bucket.rs +++ b/src/_async/r2bucket.rs @@ -1,4 +1,4 @@ -use crate::R2Client; +use crate::_async::R2Client; use crate::R2Error; #[derive(Debug)] @@ -55,28 +55,8 @@ impl R2Bucket { pub async fn list_folders(&self) -> Result, R2Error> { self.client.list_folders(&self.bucket).await } -} -#[cfg(test)] -mod tests { - use super::*; - - fn get_test_bucket() -> R2Bucket { - dotenv::dotenv().ok(); - R2Bucket::new("test-bucket".to_string()) + pub async fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> { + self.client.delete(&self.bucket, r2_file_key).await } - - #[test] - fn test_bucket_construction() { - let bucket = get_test_bucket(); - assert_eq!(bucket.bucket, "test-bucket"); - } - - // Example async test (requires a runtime, so ignored by default) - // #[tokio::test] - // async fn test_upload_file() { - // let bucket = get_test_bucket(); - // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; - // assert!(result.is_ok()); - // } } diff --git a/src/_async/r2client.rs b/src/_async/r2client.rs index 5f3e67c..4d2c109 100644 --- a/src/_async/r2client.rs +++ b/src/_async/r2client.rs @@ -1,14 +1,15 @@ use crate::R2Error; -use crate::aws_signing::Sigv4Client; -use crate::mimetypes::Mime; +use crate::mimetypes::get_mimetype_from_fp; +use aws_sigv4::SigV4Credentials; use http::Method; +use log::trace; use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; #[derive(Debug)] pub struct R2Client { - sigv4: Sigv4Client, + sigv4: SigV4Credentials, endpoint: String, } impl R2Client { @@ -23,14 +24,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -56,31 +57,6 @@ impl R2Client { } let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); - // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap - // directly. - // // Granted this impl had much better handling, fuck it we ball. - // let mut header_map = header::HeaderMap::new(); - // for header in headers { - // header_map.insert( - // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header name out of {}", - // header.0.to_lowercase() - // ) - // }) - // .unwrap(), - // HeaderValue::from_str(&header.1) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", - // header.1, - // header.0.to_lowercase(), - // ) - // }) - // .unwrap(), - // ); - // } Ok(header_map) } @@ -93,16 +69,16 @@ impl R2Client { ) -> Result<(), R2Error> { // Payload (file data) let payload = std::fs::read(local_file_path)?; - println!( + trace!( "[upload_file] Payload hash for signing: {}", - crate::aws_signing::hash(&payload) + aws_sigv4::hash(&payload) ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { Some(content_type) } else { - Some(Mime::get_mimetype_from_fp(local_file_path)) + Some(get_mimetype_from_fp(local_file_path)) }; let headers = self.create_headers( Method::PUT, @@ -112,7 +88,7 @@ impl R2Client { content_type, None, )?; - println!("[upload_file] Headers sent to request: {headers:#?}"); + trace!("[upload_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(r2_file_key)); let client = reqwest::Client::new(); let resp = client @@ -145,10 +121,10 @@ impl R2Client { // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // I don't know if I should trust it though, I don't see public impls with this. let payload = ""; - println!("[download_file] Payload for signing: (empty)"); + trace!("[download_file] Payload for signing: (empty)"); let headers = self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; - println!("[download_file] Headers sent to request: {headers:#?}"); + trace!("[download_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(key)); let client = reqwest::Client::new(); let resp = client.get(&file_url).headers(headers).send().await?; @@ -164,11 +140,37 @@ impl R2Client { )) } } + pub async fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> { + let payload = ""; + trace!("[delete_file] Payload for signing: (empty)"); + let headers = self.create_headers( + Method::DELETE, + bucket, + Some(remote_key), + payload, + None, + None, + )?; + trace!("[delete_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(remote_key)); + let client = reqwest::Client::new(); + let resp = client.delete(&file_url).headers(headers).send().await?; + let status = resp.status(); + if status.is_success() { + Ok(()) + } else { + Err(R2Error::FailedRequest( + format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) + } + } async fn get_bucket_listing(&self, bucket: &str) -> Result { let payload = ""; - println!("[get_bucket_listing] Payload for signing: (empty)"); + trace!("[get_bucket_listing] Payload for signing: (empty)"); let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; - println!("[get_bucket_listing] Headers sent to request: {headers:#?}"); + trace!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); let client = reqwest::Client::new(); let resp = client @@ -223,10 +225,10 @@ impl R2Client { .filter(|e| e.name == "Contents") { let key_elem = content.get_child("Key").and_then(|k| k.get_text()); - if let Some(file_key) = key_elem { - if let Some(idx) = file_key.find('/') { - folders.insert(file_key[..idx].to_string()); - } + if let Some(file_key) = key_elem + && let Some(idx) = file_key.find('/') + { + folders.insert(file_key[..idx].to_string()); } } Ok(folders.into_iter().collect()) @@ -235,7 +237,7 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { Some(k) => { - let encoded_key = crate::aws_signing::url_encode(k); + let encoded_key = aws_sigv4::url_encode(k); format!("{}/{}/{}", self.endpoint, bucket, encoded_key) } None => format!("{}/{}/", self.endpoint, bucket), diff --git a/src/aws_signing.rs b/src/aws_signing.rs deleted file mode 100644 index 5c792b1..0000000 --- a/src/aws_signing.rs +++ /dev/null @@ -1,294 +0,0 @@ -use chrono::Utc; -use hmac::{Hmac, Mac}; -use sha2::{Digest, Sha256}; - -type Hmac256 = Hmac; - -const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; - -// --- Utility functions --- -fn lowercase(string: &str) -> String { - string.to_lowercase() -} - -fn hex>(data: T) -> String { - hex::encode(data) -} - -fn sha256hash>(data: T) -> [u8; 32] { - Sha256::digest(data).into() -} - -fn hmac_sha256(signing_key: &[u8], message: &str) -> Vec { - let mut mac = Hmac256::new_from_slice(signing_key).expect("bad key :pensive:"); - mac.update(message.as_bytes()); - mac.finalize().into_bytes().to_vec() -} - -fn trim(string: &str) -> String { - string.trim().to_string() -} - -pub fn hash>(payload: T) -> String { - hex(sha256hash(payload)) -} - -pub fn url_encode(url: &str) -> String { - let mut url = urlencoding::encode(url).into_owned(); - let encoded_to_replacement: [(&str, &str); 4] = - [("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")]; - for (encoded_chars_pattern, replacement) in encoded_to_replacement { - url = url.replace(encoded_chars_pattern, replacement) - } - url -} - -// --- Signing Functions --- -// These don't use any parts of the Sigv4Client, so they are external - -// --- Canonical request --- -fn create_canonical_request( - method: http::Method, - uri: http::Uri, - mut headers: Vec<(String, String)>, - hashed_payload: &str, -) -> (String, Vec<(String, String)>, String) { - // HTTPMethod - let http_method = method.to_string(); - - // CanonicalURI = *path only* (spec forbids scheme+host here) - let canonical_uri = if uri.path().is_empty() { - "/".to_string() - } else { - uri.path().to_string() - }; - - // CanonicalQueryString (URL-encoded, sorted by key) - let canonical_query_string = if let Some(query_string) = uri.query() { - let mut pairs = query_string - .split('&') - .map(|query| { - let (k, v) = query.split_once('=').unwrap_or((query, "")); - (url_encode(k), url_encode(v)) - }) - .collect::>(); - pairs.sort_by(|a, b| a.0.cmp(&b.0)); - pairs - .into_iter() - .map(|(k, v)| format!("{k}={v}")) - .collect::>() - .join("&") - } else { - String::new() - }; - - // checks for proper host headers - let host = uri - .host() - .expect("uri passed without a proper host") - .to_string(); - if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("host")) { - headers.push(("host".to_string(), host)); - } - - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256")) - { - headers.push(( - "x-amz-content-sha256".to_string(), - hashed_payload.to_owned(), - )) - } - - // CanonicalHeaders + SignedHeaders - let mut http_headers = headers - .iter() - .map(|(name, value)| (lowercase(name), trim(value))) - .collect::>(); - http_headers.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); - - let canonical_headers: String = http_headers - .iter() - .map(|(k, v)| format!("{k}:{v}\n")) - .collect(); - - let signed_headers: String = http_headers - .iter() - .map(|(k, _)| k.clone()) - .collect::>() - .join(";"); - - // Final canonical request - let canonical_request = format!( - "{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}" - ); - - (canonical_request, http_headers, signed_headers) -} -fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec { - hmac_sha256(signing_key, string_to_sign) -} - -fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String { - format!( - "{}\n{}\n{}\n{}", - "AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request - ) -} -/// Structure containing all the data relevant for an AWS Service utilizing SigV4. -/// Service: String containing the AWS service (e.g. "ec2" or "s3") -/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1") -/// Access Key: The "Access Key" to use with the AWS service (crazy, ik) -/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah) -/// -/// ``` -/// -/// -/// use aws_signing::Sigv4Client; -/// -/// let s3_client = Sigv4Client::new( -/// "s3", -/// "us-east-1", -/// std::env::var("S3_ACCESS_KEY").unwrap(), -/// std::env::var("S3_SECRET_KEY").unwrap(), -/// ) -/// let (_, request_headers) = s3client.signature( -/// http::Method::GET, -/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"), -/// vec![("content-type", "text/plain")], -/// "" // Since it's a GET request, the payload is "" -/// ) -/// ``` -#[derive(Debug)] -// A more mature client would also have session_key: Option, but not my problem -pub struct Sigv4Client { - // Would it makes more sense for these to be type generics - // with trait param ToString? - // Either that or just &str or String...wait, union? - // Nah there has to be a better way to do it than...that - // but I don't wanna enum!!! - service: String, - region: String, - // Would it makes more sense for these to be type generics - // with trait param AsRef<[u8]>? - access_key: String, - secret_key: String, -} -/// NOTE: This only impliments functions that require one of the Sigv4Client fields. -/// For other functions related to the signing proccess, they are defined above, including the -/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html -impl Sigv4Client { - /// Creates a new instance of the Sigv4Client for a particular service, in a region, with your - /// private and public access keys. - /// - /// For some reason this function will take any values that impl Into, so you can pass - /// &str, String, or something else if you decide to get freaky. - pub fn new( - service: impl Into, - region: impl Into, - pub_key: impl Into, - priv_key: impl Into, - ) -> Self { - Self { - service: service.into(), - region: region.into(), - access_key: pub_key.into(), - secret_key: priv_key.into(), - } - } - - // In a more mature client, this might be an enum of AWSRegions - // I also don't even know if this could ever be useful lol, wouldn't you have individual - // clients for each region or use "auto" for AWS to figure it out for you? whatever. - pub fn set_region(&mut self, region: impl Into) { - self.region = region.into() - } - - fn credential_scope(&self, date: &str) -> String { - format!( - "{}/{}/{}/aws4_request", - date, - lowercase(&self.region), - lowercase(&self.service) - ) - } - - fn derive_signing_key(&self, date: &str) -> Vec { - let secret_key = &self.secret_key; - let key = format!("AWS4{secret_key}"); - let date_key = hmac_sha256(key.as_bytes(), date); - let date_region_key = hmac_sha256(&date_key, &self.region); - let date_region_service_key = hmac_sha256(&date_region_key, &self.service); - hmac_sha256(&date_region_service_key, "aws4_request") - } - - // --- API --- - /// This is the only function to use <3 - pub fn signature>( - &self, - method: http::Method, - uri: http::Uri, - // Should probably make this a header map, then turn it into a Vec(String, String) to sort - // by header name cause Amazon said so. - mut headers: Vec<(String, String)>, - payload: T, - ) -> (String, http::HeaderMap) { - let auth_algorithm = "AWS4-HMAC-SHA256"; - let now = Utc::now(); - let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); - let date = now.format("%Y%m%d").to_string(); - let payload_as_bytes = payload.as_ref(); - let payload_hash = if payload_as_bytes.is_empty() { - EMPTY_PAYLOAD_HASH.to_string() - } else { - hash(payload_as_bytes) - }; - - // Add x-amz-date header if not already present - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) - { - headers.push(("x-amz-date".to_string(), amz_date.clone())); - } - - // Canonical request - let (canonical_request, mut headers, signed_headers) = - create_canonical_request(method, uri, headers, &payload_hash); - - // String to sign - let scope = self.credential_scope(&date); - let hashed_canonical_request = hash(&canonical_request); - let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request); - - // Signing key + signature - let signing_key = self.derive_signing_key(&date); - let signature = hex(calculate_signature(&signing_key, &string_to_sign)); - - // Authorization header - let access_key = &self.access_key; - let credential = format!("{access_key}/{scope}"); - let auth_header = format!( - "{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}" - ); - - println!("\n--- AWS SigV4 Debug ---"); - println!("1. CanonicalRequest:\n---\n{canonical_request}\n---"); - println!("2. StringToSign:\n---\n{string_to_sign}\n---"); - println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key)); - println!("4. Signature:\n---\n{signature}\n---"); - println!("5. Authorization Header:\n---\n{auth_header}\n---"); - - headers.push(("authorization".to_string(), auth_header)); - - let mut header_map: http::HeaderMap = http::HeaderMap::new(); - for (header, value) in headers.clone() { - header_map.insert( - http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(), - http::HeaderValue::from_str(&value).unwrap(), - ); - } - (signature, header_map) - } -} diff --git a/src/lib.rs b/src/lib.rs index 02b7efa..e2c4068 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -mod aws_signing; mod error; mod mimetypes; pub use error::R2Error; @@ -9,11 +8,3 @@ pub use _async::{R2Bucket, R2Client}; #[cfg(feature = "sync")] pub mod sync; - -#[cfg(test)] -mod test { - // use crate::{R2Bucket, R2Client, sync}; - - #[test] - fn test() {} -} diff --git a/src/mimetypes.rs b/src/mimetypes.rs index 28ffcc1..c3ae6c4 100644 --- a/src/mimetypes.rs +++ b/src/mimetypes.rs @@ -1,103 +1,112 @@ +pub fn get_mimetype(key: &str) -> &'static str { + match key { + // Image formats + ".png" => "image/png", + ".jpg" | ".jpeg" => "image/jpeg", + ".gif" => "image/gif", + ".svg" => "image/svg+xml", + ".ico" => "image/x-icon", + ".webp" => "image/webp", + // Audio formats + ".m4a" => "audio/x-m4a", + ".mp3" => "audio/mpeg", + ".wav" => "audio/wav", + ".ogg" => "audio/ogg", -pub enum Mime{} + // Video formats + ".mp4" => "video/mp4", + ".avi" => "video/x-msvideo", + ".mov" => "video/quicktime", + ".flv" => "video/x-flv", + ".wmv" => "video/x-ms-wmv", + ".webm" => "video/webm", -impl Mime { - pub fn get_mimetype(key: &str) -> &'static str { - match key { - // Image formats - ".png" => "image/png", - ".jpg" | ".jpeg" => "image/jpeg", - ".gif" => "image/gif", - ".svg" => "image/svg+xml", - ".ico" => "image/x-icon", - ".webp" => "image/webp", + // Document formats + ".pdf" => "application/pdf", + ".doc" => "application/msword", + ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".ppt" => "application/vnd.ms-powerpoint", + ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ".xls" => "application/vnd.ms-excel", + ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".txt" => "text/plain", - // Audio formats - ".m4a" => "audio/x-m4a", - ".mp3" => "audio/mpeg", - ".wav" => "audio/wav", - ".ogg" => "audio/ogg", + // Web formats + ".html" => "text/html", + ".css" => "text/css", + ".js" => "application/javascript", + ".json" => "application/json", + ".xml" => "application/xml", - // Video formats - ".mp4" => "video/mp4", - ".avi" => "video/x-msvideo", - ".mov" => "video/quicktime", - ".flv" => "video/x-flv", - ".wmv" => "video/x-ms-wmv", - ".webm" => "video/webm", + // Other formats + ".csv" => "text/csv", + ".zip" => "application/zip", + ".tar" => "application/x-tar", + ".gz" => "application/gzip", + ".rar" => "application/vnd.rar", + ".7z" => "application/x-7z-compressed", + ".eps" => "application/postscript", + ".sql" => "application/sql", + ".java" => "text/x-java-source", + _ => "application/octet-stream", + } +} - // Document formats - ".pdf" => "application/pdf", - ".doc" => "application/msword", - ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - ".ppt" => "application/vnd.ms-powerpoint", - ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", - ".xls" => "application/vnd.ms-excel", - ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - ".txt" => "text/plain", - - // Web formats - ".html" => "text/html", - ".css" => "text/css", - ".js" => "application/javascript", - ".json" => "application/json", - ".xml" => "application/xml", - - // Other formats - ".csv" => "text/csv", - ".zip" => "application/zip", - ".tar" => "application/x-tar", - ".gz" => "application/gzip", - ".rar" => "application/vnd.rar", - ".7z" => "application/x-7z-compressed", - ".eps" => "application/postscript", - ".sql" => "application/sql", - ".java" => "text/x-java-source", - _ => "application/octet-stream", - } - } - - pub fn get_mimetype_from_fp(file_path: &str) -> &str { - // Sorry I just really wanted to get it done in a one liner. - // This splits a filepath based off ".", in reverse order, so that the first element will - // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") - // This is formated back to ".jpeg" because it's how the match statement is working. - // I could very easily change it but idk it was an interesting thing. - Self::get_mimetype( - &format!( - ".{}", file_path.rsplit(".") - .next() - .unwrap_or("time_to_be_an_octet_stream_lmao") - ) - ) - } - +pub fn get_mimetype_from_fp(file_path: &str) -> &str { + // Sorry I just really wanted to get it done in a one liner. + // This splits a filepath based off ".", in reverse order, so that the first element will + // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") + // This is formated back to ".jpeg" because it's how the match statement is working. + // I could very easily change it but idk it was an interesting thing. + // + // Hey, so maybe you should change the match statement to not care about the '.'? + // Then again this is just being used for this project, so I guess it doesn't really matter + get_mimetype(&format!( + ".{}", + file_path + .rsplit(".") + .next() + .unwrap_or("time_to_be_an_octet_stream_lmao") + )) } #[cfg(test)] mod tests { - use super::*; + use super::*; - #[test] - fn match_mime_test() { - assert_eq!(Mime::get_mimetype(".tar"), "application/x-tar"); - } + #[test] + fn match_mime_test() { + assert_eq!(get_mimetype(".tar"), "application/x-tar"); + } - #[test] - fn default_mime_test() { - assert_eq!(Mime::get_mimetype(".bf"), "application/octet-stream"); - } + #[test] + fn default_mime_test() { + assert_eq!(get_mimetype(".bf"), "application/octet-stream"); + } - #[test] - fn mime_from_file() { - assert_eq!(Mime::get_mimetype_from_fp("test.ico"), "image/x-icon"); - } + #[test] + fn mime_from_file() { + assert_eq!(get_mimetype_from_fp("test.ico"), "image/x-icon"); + } - #[test] - fn mime_from_file_path() { - assert_eq!(Mime::get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), "application/pdf"); - assert_eq!(Mime::get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), "text/plain") - } + #[test] + fn mime_from_file_path() { + assert_eq!( + get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), + "application/pdf" + ); + assert_eq!( + get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), + "text/plain" + ) + } -} \ No newline at end of file + #[test] + fn no_ext() { + assert_eq!( + get_mimetype_from_fp("edge_case_lmao"), + "application/octet-stream" + ) + } +} diff --git a/src/sync/r2bucket.rs b/src/sync/r2bucket.rs index ec4a471..e9fec51 100644 --- a/src/sync/r2bucket.rs +++ b/src/sync/r2bucket.rs @@ -1,4 +1,4 @@ -use crate::R2Client; +use crate::sync::R2Client; use crate::R2Error; #[derive(Debug)] @@ -29,54 +29,26 @@ impl R2Bucket { Self { bucket, client } } - pub async fn upload_file( - &self, - local_file_path: &str, - r2_file_key: &str, - ) -> Result<(), R2Error> { + pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> { self.client // I'm pasing None to let the R2Client derive the content type from the local_file_path .upload_file(&self.bucket, local_file_path, r2_file_key, None) - .await } - pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client .download_file(&self.bucket, r2_file_key, local_path, None) - .await } - pub async fn list_files( - &self, - ) -> Result>, R2Error> { - self.client.list_files(&self.bucket).await + pub fn list_files(&self) -> Result>, R2Error> { + self.client.list_files(&self.bucket) } - pub async fn list_folders(&self) -> Result, R2Error> { - self.client.list_folders(&self.bucket).await + pub fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket) + } + + pub fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> { + self.client.delete(&self.bucket, r2_file_key) } } - -#[cfg(test)] -mod tests { - use super::*; - - fn get_test_bucket() -> R2Bucket { - dotenv::dotenv().ok(); - R2Bucket::new("test-bucket".to_string()) - } - - #[test] - fn test_bucket_construction() { - let bucket = get_test_bucket(); - assert_eq!(bucket.bucket, "test-bucket"); - } - - // Example async test (requires a runtime, so ignored by default) - // #[tokio::test] - // async fn test_upload_file() { - // let bucket = get_test_bucket(); - // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; - // assert!(result.is_ok()); - // } -} diff --git a/src/sync/r2client.rs b/src/sync/r2client.rs index c87abdc..0176fd7 100644 --- a/src/sync/r2client.rs +++ b/src/sync/r2client.rs @@ -1,14 +1,15 @@ use crate::R2Error; -use crate::aws_signing::Sigv4Client; -use crate::mimetypes::Mime; +use crate::mimetypes::get_mimetype_from_fp; +use aws_sigv4::SigV4Credentials; use http::Method; +use log::trace; use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; #[derive(Debug)] pub struct R2Client { - sigv4: Sigv4Client, + sigv4: SigV4Credentials, endpoint: String, } impl R2Client { @@ -23,14 +24,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -42,10 +43,11 @@ impl R2Client { key: Option<&str>, payload: impl AsRef<[u8]>, content_type: Option<&str>, + extra_headers: Option>, ) -> Result { let uri = http::Uri::from_str(&self.build_url(bucket, key)) .expect("invalid uri rip (make sure the build_url function works as intended)"); - let mut headers = Vec::new(); + let mut headers = extra_headers.unwrap_or_default(); headers.push(( "host".to_string(), uri.host().expect("Should have host in URI").to_owned(), @@ -55,35 +57,10 @@ impl R2Client { } let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); - // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap - // directly. - // // Granted this impl had much better handling, fuck it we ball. - // let mut header_map = header::HeaderMap::new(); - // for header in headers { - // header_map.insert( - // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header name out of {}", - // header.0.to_lowercase() - // ) - // }) - // .unwrap(), - // HeaderValue::from_str(&header.1) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", - // header.1, - // header.0.to_lowercase(), - // ) - // }) - // .unwrap(), - // ); - // } Ok(header_map) } - pub async fn upload_file( + pub fn upload_file( &self, bucket: &str, local_file_path: &str, @@ -92,12 +69,16 @@ impl R2Client { ) -> Result<(), R2Error> { // Payload (file data) let payload = std::fs::read(local_file_path)?; + trace!( + "[upload_file] Payload hash for signing: {}", + aws_sigv4::hash(&payload) + ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { Some(content_type) } else { - Some(Mime::get_mimetype_from_fp(local_file_path)) + Some(get_mimetype_from_fp(local_file_path)) }; let headers = self.create_headers( Method::PUT, @@ -105,17 +86,18 @@ impl R2Client { Some(r2_file_key), &payload, content_type, + None, )?; + trace!("[upload_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(r2_file_key)); - let client = reqwest::Client::new(); + let client = reqwest::blocking::Client::new(); let resp = client .put(&file_url) .headers(headers) .body(payload) - .send() - .await?; + .send()?; let status = resp.status(); - let text = resp.text().await?; + let text = resp.text()?; if status.is_success() { Ok(()) } else { @@ -128,60 +110,87 @@ impl R2Client { )) } } - pub async fn download_file( + pub fn download_file( &self, bucket: &str, key: &str, local_path: &str, + extra_headers: Option>, ) -> Result<(), R2Error> { // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // I don't know if I should trust it though, I don't see public impls with this. - let payload = "UNSIGNED-PAYLOAD"; - let content_type = Mime::get_mimetype_from_fp(local_path); + let payload = ""; + trace!("[download_file] Payload for signing: (empty)"); let headers = - self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); - let client = reqwest::Client::new(); - let resp = client.get(&file_url).headers(headers).send().await?; + self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; + trace!("[download_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(key)); + let client = reqwest::blocking::Client::new(); + let resp = client.get(&file_url).headers(headers).send()?; let status = resp.status(); if status.is_success() { - std::fs::write(local_path, resp.bytes().await?)?; + std::fs::write(local_path, resp.bytes()?)?; Ok(()) } else { Err(R2Error::FailedRequest( format!("dowloading file \"{key}\" from bucket \"{bucket}\""), status, - resp.text().await?, + resp.text()?, )) } } - async fn get_bucket_listing(&self, bucket: &str) -> Result { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/xml"; - let headers = - self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; + pub fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> { + let payload = ""; + trace!("[delete_file] Payload for signing: (empty)"); + let headers = self.create_headers( + Method::DELETE, + bucket, + Some(remote_key), + payload, + None, + None, + )?; + trace!("[delete_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(remote_key)); + let client = reqwest::blocking::Client::new(); + let resp = client.delete(&file_url).headers(headers).send()?; + let status = resp.status(); + if status.is_success() { + Ok(()) + } else { + Err(R2Error::FailedRequest( + format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""), + status, + resp.text()?, + )) + } + } + fn get_bucket_listing(&self, bucket: &str) -> Result { + let payload = ""; + trace!("[get_bucket_listing] Payload for signing: (empty)"); + let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; + trace!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); - let client = reqwest::Client::new(); + let client = reqwest::blocking::Client::new(); let resp = client .get(&url) .headers(headers) .send() - .await .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - Ok(resp.text().await.map_err(R2Error::from)?) + Ok(resp.text().map_err(R2Error::from)?) } else { Err(R2Error::FailedRequest( String::from("list bucket...folders or something idfk"), status, - resp.text().await.map_err(R2Error::from)?, + resp.text().map_err(R2Error::from)?, )) } } - pub async fn list_files(&self, bucket: &str) -> Result>, R2Error> { - let xml = self.get_bucket_listing(bucket).await?; + pub fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket)?; let mut files_dict: HashMap> = HashMap::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -203,8 +212,8 @@ impl R2Client { Ok(files_dict) } - pub async fn list_folders(&self, bucket: &str) -> Result, R2Error> { - let xml = self.get_bucket_listing(bucket).await?; + pub fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket)?; let mut folders = std::collections::HashSet::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -214,10 +223,10 @@ impl R2Client { .filter(|e| e.name == "Contents") { let key_elem = content.get_child("Key").and_then(|k| k.get_text()); - if let Some(file_key) = key_elem { - if let Some(idx) = file_key.find('/') { - folders.insert(file_key[..idx].to_string()); - } + if let Some(file_key) = key_elem + && let Some(idx) = file_key.find('/') + { + folders.insert(file_key[..idx].to_string()); } } Ok(folders.into_iter().collect()) @@ -225,7 +234,10 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { - Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + Some(k) => { + let encoded_key = aws_sigv4::url_encode(k); + format!("{}/{}/{}", self.endpoint, bucket, encoded_key) + } None => format!("{}/{}/", self.endpoint, bucket), } } @@ -279,6 +291,7 @@ mod tests { Some("key"), "deadbeef", Some("application/octet-stream"), + None, ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/tests/r2_tests.rs b/tests/r2_tests.rs index 73f1693..f5dcf00 100644 --- a/tests/r2_tests.rs +++ b/tests/r2_tests.rs @@ -1,4 +1,3 @@ -use std::env; use std::fs; use std::io::Write; @@ -63,7 +62,6 @@ mod sync_tests { // Cleanup fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_download_path).unwrap(); - // NOTE: The test file on R2 is not deleted as there is no delete API yet. } } @@ -132,6 +130,8 @@ mod async_tests { // Cleanup fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_download_path).unwrap(); - // NOTE: The test file on R2 is not deleted as there is no delete API yet. + + // 6. Delete file + bucket.delete_file(r2_file_key).await.unwrap(); } } diff --git a/todo.md b/todo.md index 562cdd8..bad86f6 100644 --- a/todo.md +++ b/todo.md @@ -1,29 +1,12 @@ -Okay I think I did everything, so to clean up: +## For release: + - [ ] Create a crate::Result that is Result, and have Ok(status_code) + - [ ] Consider dropping more dependencies, using hyper or some lower level stuff for async, and then http for blocking + - [ ] A way to view the file contents (UTF-8 valid) would be cool + - [ ] Add functions that will list files with their metadata (perhaps a simple R2File type?) + - [ ] Clear out all all print statements and consider logging (this is a library, after all) -# Fatal Bug -When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header. - -Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed. -The output of that function includes request and headers with the host headers in there. -I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector -I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it - -The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves. -Technically the output of the canonical_request is correct: -Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"` -However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure. -But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows: -Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"` -Which seems to match up, however the final signature that the main `signature` function returns is: -Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]` -So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one. -This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector -I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything. - -Okay after saying all that, surely I could've just fixed it by now and will remember it in the later. - -- [X] Update the sync library -- [X] Make a .env with test-bucket creds -- [ ] Actually test the damn thing -- [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?) +## Dev (since we're so back): + - [X] Update the sync library + - [X] Make a .env with test-bucket creds + - [X] Actually test the damn thing From 540204e41c2148aa6d6cf566dfbccc92a349d5f7 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Fri, 19 Sep 2025 21:27:16 -0400 Subject: [PATCH 4/4] Moved from repo to cargo workspace repo yuh --- Cargo.toml => r2client/Cargo.toml | 0 {src => r2client/src}/_async.rs | 0 {src => r2client/src}/_async/r2bucket.rs | 0 {src => r2client/src}/_async/r2client.rs | 0 {src => r2client/src}/error.rs | 0 {src => r2client/src}/lib.rs | 0 {src => r2client/src}/mimetypes.rs | 0 {src => r2client/src}/sync.rs | 0 {src => r2client/src}/sync/r2bucket.rs | 0 {src => r2client/src}/sync/r2client.rs | 0 {tests => r2client/tests}/r2_tests.rs | 0 todo.md => r2client/todo.md | 0 12 files changed, 0 insertions(+), 0 deletions(-) rename Cargo.toml => r2client/Cargo.toml (100%) rename {src => r2client/src}/_async.rs (100%) rename {src => r2client/src}/_async/r2bucket.rs (100%) rename {src => r2client/src}/_async/r2client.rs (100%) rename {src => r2client/src}/error.rs (100%) rename {src => r2client/src}/lib.rs (100%) rename {src => r2client/src}/mimetypes.rs (100%) rename {src => r2client/src}/sync.rs (100%) rename {src => r2client/src}/sync/r2bucket.rs (100%) rename {src => r2client/src}/sync/r2client.rs (100%) rename {tests => r2client/tests}/r2_tests.rs (100%) rename todo.md => r2client/todo.md (100%) diff --git a/Cargo.toml b/r2client/Cargo.toml similarity index 100% rename from Cargo.toml rename to r2client/Cargo.toml diff --git a/src/_async.rs b/r2client/src/_async.rs similarity index 100% rename from src/_async.rs rename to r2client/src/_async.rs diff --git a/src/_async/r2bucket.rs b/r2client/src/_async/r2bucket.rs similarity index 100% rename from src/_async/r2bucket.rs rename to r2client/src/_async/r2bucket.rs diff --git a/src/_async/r2client.rs b/r2client/src/_async/r2client.rs similarity index 100% rename from src/_async/r2client.rs rename to r2client/src/_async/r2client.rs diff --git a/src/error.rs b/r2client/src/error.rs similarity index 100% rename from src/error.rs rename to r2client/src/error.rs diff --git a/src/lib.rs b/r2client/src/lib.rs similarity index 100% rename from src/lib.rs rename to r2client/src/lib.rs diff --git a/src/mimetypes.rs b/r2client/src/mimetypes.rs similarity index 100% rename from src/mimetypes.rs rename to r2client/src/mimetypes.rs diff --git a/src/sync.rs b/r2client/src/sync.rs similarity index 100% rename from src/sync.rs rename to r2client/src/sync.rs diff --git a/src/sync/r2bucket.rs b/r2client/src/sync/r2bucket.rs similarity index 100% rename from src/sync/r2bucket.rs rename to r2client/src/sync/r2bucket.rs diff --git a/src/sync/r2client.rs b/r2client/src/sync/r2client.rs similarity index 100% rename from src/sync/r2client.rs rename to r2client/src/sync/r2client.rs diff --git a/tests/r2_tests.rs b/r2client/tests/r2_tests.rs similarity index 100% rename from tests/r2_tests.rs rename to r2client/tests/r2_tests.rs diff --git a/todo.md b/r2client/todo.md similarity index 100% rename from todo.md rename to r2client/todo.md