From 76cc06d7e1a3e50d8ca2021f9123d28535e74752 Mon Sep 17 00:00:00 2001 From: ForeverPyrite Date: Fri, 19 Sep 2025 21:02:12 -0400 Subject: [PATCH] Pre-release commit Anything below this first part is definitely me being astounded This is the commit where it is in a working state that isn't exactly primitive, but it's not comprehensive, fully fleshed out, or polished either. But I like where it's going You wouldn't believe it. I came back to this after a MASSIVE crash out with how HUGE and CLUNKY trying to use the AWS SDK was. Well Guess what GUESS Literally I just went to test it this morning and I freaking KID YOU NOT it just worked. no changes. it just worked. YOU WANNA KNOW HOW I CAN TELL? Look at this FREAKING COMMIT and see that ALL THAT CHANGED was the todo.md FILE literally what the fart I shouldn't complain but OH MY GOODNESS I spent SO LONG working on it and it turned out it WASN'T all for naught because I guess something was just...stuck. Bad credentials somehow? I don't know anymore. I'm just glad it's over. --- Cargo.toml | 21 +-- src/_async/r2bucket.rs | 26 +--- src/_async/r2client.rs | 88 ++++++------ src/aws_signing.rs | 294 ----------------------------------------- src/lib.rs | 9 -- src/mimetypes.rs | 185 ++++++++++++++------------ src/sync/r2bucket.rs | 50 ++----- src/sync/r2client.rs | 141 +++++++++++--------- tests/r2_tests.rs | 6 +- todo.md | 37 ++---- 10 files changed, 252 insertions(+), 605 deletions(-) delete mode 100644 src/aws_signing.rs diff --git a/Cargo.toml b/Cargo.toml index f2d3db4..6c11923 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,32 +1,23 @@ [package] name = "r2client" -version = "0.1.0" +version = "0.2.0" edition = "2024" [lib] [dependencies] -sha2 = "0.10.9" -bytes = "1.10.1" -reqwest = { version = "0.12.19", features = ["blocking"] } -chrono = "0.4.41" -hex = "0.4.3" -hmac = "0.12.1" +reqwest = "0.12.19" xmltree = "0.11.0" thiserror = "2" -async-trait = "0.1.89" -async-std = { version = "1.0", optional = true } -tokio = { version = "1.0", features = ["rt-multi-thread"], optional = true } -futures-executor = { version = "0.3", optional = true } -urlencoding = "2.1.3" http = "1.3.1" +aws_sigv4 = { path = "../aws_sigv4/" } +log = "0.4.28" [dev-dependencies] tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] } dotenv = "0.15" [features] -async = ["tokio"] +async = [] default = ["async"] -sync = ["tokio/rt-multi-thread", "futures-executor"] -async-std-runtime = ["async-std"] +sync = ["reqwest/blocking"] diff --git a/src/_async/r2bucket.rs b/src/_async/r2bucket.rs index ec4a471..692a4fe 100644 --- a/src/_async/r2bucket.rs +++ b/src/_async/r2bucket.rs @@ -1,4 +1,4 @@ -use crate::R2Client; +use crate::_async::R2Client; use crate::R2Error; #[derive(Debug)] @@ -55,28 +55,8 @@ impl R2Bucket { pub async fn list_folders(&self) -> Result, R2Error> { self.client.list_folders(&self.bucket).await } -} -#[cfg(test)] -mod tests { - use super::*; - - fn get_test_bucket() -> R2Bucket { - dotenv::dotenv().ok(); - R2Bucket::new("test-bucket".to_string()) + pub async fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> { + self.client.delete(&self.bucket, r2_file_key).await } - - #[test] - fn test_bucket_construction() { - let bucket = get_test_bucket(); - assert_eq!(bucket.bucket, "test-bucket"); - } - - // Example async test (requires a runtime, so ignored by default) - // #[tokio::test] - // async fn test_upload_file() { - // let bucket = get_test_bucket(); - // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; - // assert!(result.is_ok()); - // } } diff --git a/src/_async/r2client.rs b/src/_async/r2client.rs index 5f3e67c..4d2c109 100644 --- a/src/_async/r2client.rs +++ b/src/_async/r2client.rs @@ -1,14 +1,15 @@ use crate::R2Error; -use crate::aws_signing::Sigv4Client; -use crate::mimetypes::Mime; +use crate::mimetypes::get_mimetype_from_fp; +use aws_sigv4::SigV4Credentials; use http::Method; +use log::trace; use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; #[derive(Debug)] pub struct R2Client { - sigv4: Sigv4Client, + sigv4: SigV4Credentials, endpoint: String, } impl R2Client { @@ -23,14 +24,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -56,31 +57,6 @@ impl R2Client { } let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); - // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap - // directly. - // // Granted this impl had much better handling, fuck it we ball. - // let mut header_map = header::HeaderMap::new(); - // for header in headers { - // header_map.insert( - // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header name out of {}", - // header.0.to_lowercase() - // ) - // }) - // .unwrap(), - // HeaderValue::from_str(&header.1) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", - // header.1, - // header.0.to_lowercase(), - // ) - // }) - // .unwrap(), - // ); - // } Ok(header_map) } @@ -93,16 +69,16 @@ impl R2Client { ) -> Result<(), R2Error> { // Payload (file data) let payload = std::fs::read(local_file_path)?; - println!( + trace!( "[upload_file] Payload hash for signing: {}", - crate::aws_signing::hash(&payload) + aws_sigv4::hash(&payload) ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { Some(content_type) } else { - Some(Mime::get_mimetype_from_fp(local_file_path)) + Some(get_mimetype_from_fp(local_file_path)) }; let headers = self.create_headers( Method::PUT, @@ -112,7 +88,7 @@ impl R2Client { content_type, None, )?; - println!("[upload_file] Headers sent to request: {headers:#?}"); + trace!("[upload_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(r2_file_key)); let client = reqwest::Client::new(); let resp = client @@ -145,10 +121,10 @@ impl R2Client { // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // I don't know if I should trust it though, I don't see public impls with this. let payload = ""; - println!("[download_file] Payload for signing: (empty)"); + trace!("[download_file] Payload for signing: (empty)"); let headers = self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; - println!("[download_file] Headers sent to request: {headers:#?}"); + trace!("[download_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(key)); let client = reqwest::Client::new(); let resp = client.get(&file_url).headers(headers).send().await?; @@ -164,11 +140,37 @@ impl R2Client { )) } } + pub async fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> { + let payload = ""; + trace!("[delete_file] Payload for signing: (empty)"); + let headers = self.create_headers( + Method::DELETE, + bucket, + Some(remote_key), + payload, + None, + None, + )?; + trace!("[delete_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(remote_key)); + let client = reqwest::Client::new(); + let resp = client.delete(&file_url).headers(headers).send().await?; + let status = resp.status(); + if status.is_success() { + Ok(()) + } else { + Err(R2Error::FailedRequest( + format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) + } + } async fn get_bucket_listing(&self, bucket: &str) -> Result { let payload = ""; - println!("[get_bucket_listing] Payload for signing: (empty)"); + trace!("[get_bucket_listing] Payload for signing: (empty)"); let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; - println!("[get_bucket_listing] Headers sent to request: {headers:#?}"); + trace!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); let client = reqwest::Client::new(); let resp = client @@ -223,10 +225,10 @@ impl R2Client { .filter(|e| e.name == "Contents") { let key_elem = content.get_child("Key").and_then(|k| k.get_text()); - if let Some(file_key) = key_elem { - if let Some(idx) = file_key.find('/') { - folders.insert(file_key[..idx].to_string()); - } + if let Some(file_key) = key_elem + && let Some(idx) = file_key.find('/') + { + folders.insert(file_key[..idx].to_string()); } } Ok(folders.into_iter().collect()) @@ -235,7 +237,7 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { Some(k) => { - let encoded_key = crate::aws_signing::url_encode(k); + let encoded_key = aws_sigv4::url_encode(k); format!("{}/{}/{}", self.endpoint, bucket, encoded_key) } None => format!("{}/{}/", self.endpoint, bucket), diff --git a/src/aws_signing.rs b/src/aws_signing.rs deleted file mode 100644 index 5c792b1..0000000 --- a/src/aws_signing.rs +++ /dev/null @@ -1,294 +0,0 @@ -use chrono::Utc; -use hmac::{Hmac, Mac}; -use sha2::{Digest, Sha256}; - -type Hmac256 = Hmac; - -const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; - -// --- Utility functions --- -fn lowercase(string: &str) -> String { - string.to_lowercase() -} - -fn hex>(data: T) -> String { - hex::encode(data) -} - -fn sha256hash>(data: T) -> [u8; 32] { - Sha256::digest(data).into() -} - -fn hmac_sha256(signing_key: &[u8], message: &str) -> Vec { - let mut mac = Hmac256::new_from_slice(signing_key).expect("bad key :pensive:"); - mac.update(message.as_bytes()); - mac.finalize().into_bytes().to_vec() -} - -fn trim(string: &str) -> String { - string.trim().to_string() -} - -pub fn hash>(payload: T) -> String { - hex(sha256hash(payload)) -} - -pub fn url_encode(url: &str) -> String { - let mut url = urlencoding::encode(url).into_owned(); - let encoded_to_replacement: [(&str, &str); 4] = - [("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")]; - for (encoded_chars_pattern, replacement) in encoded_to_replacement { - url = url.replace(encoded_chars_pattern, replacement) - } - url -} - -// --- Signing Functions --- -// These don't use any parts of the Sigv4Client, so they are external - -// --- Canonical request --- -fn create_canonical_request( - method: http::Method, - uri: http::Uri, - mut headers: Vec<(String, String)>, - hashed_payload: &str, -) -> (String, Vec<(String, String)>, String) { - // HTTPMethod - let http_method = method.to_string(); - - // CanonicalURI = *path only* (spec forbids scheme+host here) - let canonical_uri = if uri.path().is_empty() { - "/".to_string() - } else { - uri.path().to_string() - }; - - // CanonicalQueryString (URL-encoded, sorted by key) - let canonical_query_string = if let Some(query_string) = uri.query() { - let mut pairs = query_string - .split('&') - .map(|query| { - let (k, v) = query.split_once('=').unwrap_or((query, "")); - (url_encode(k), url_encode(v)) - }) - .collect::>(); - pairs.sort_by(|a, b| a.0.cmp(&b.0)); - pairs - .into_iter() - .map(|(k, v)| format!("{k}={v}")) - .collect::>() - .join("&") - } else { - String::new() - }; - - // checks for proper host headers - let host = uri - .host() - .expect("uri passed without a proper host") - .to_string(); - if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("host")) { - headers.push(("host".to_string(), host)); - } - - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256")) - { - headers.push(( - "x-amz-content-sha256".to_string(), - hashed_payload.to_owned(), - )) - } - - // CanonicalHeaders + SignedHeaders - let mut http_headers = headers - .iter() - .map(|(name, value)| (lowercase(name), trim(value))) - .collect::>(); - http_headers.sort_by(|(k1, _), (k2, _)| k1.cmp(k2)); - - let canonical_headers: String = http_headers - .iter() - .map(|(k, v)| format!("{k}:{v}\n")) - .collect(); - - let signed_headers: String = http_headers - .iter() - .map(|(k, _)| k.clone()) - .collect::>() - .join(";"); - - // Final canonical request - let canonical_request = format!( - "{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}" - ); - - (canonical_request, http_headers, signed_headers) -} -fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec { - hmac_sha256(signing_key, string_to_sign) -} - -fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String { - format!( - "{}\n{}\n{}\n{}", - "AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request - ) -} -/// Structure containing all the data relevant for an AWS Service utilizing SigV4. -/// Service: String containing the AWS service (e.g. "ec2" or "s3") -/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1") -/// Access Key: The "Access Key" to use with the AWS service (crazy, ik) -/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah) -/// -/// ``` -/// -/// -/// use aws_signing::Sigv4Client; -/// -/// let s3_client = Sigv4Client::new( -/// "s3", -/// "us-east-1", -/// std::env::var("S3_ACCESS_KEY").unwrap(), -/// std::env::var("S3_SECRET_KEY").unwrap(), -/// ) -/// let (_, request_headers) = s3client.signature( -/// http::Method::GET, -/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"), -/// vec![("content-type", "text/plain")], -/// "" // Since it's a GET request, the payload is "" -/// ) -/// ``` -#[derive(Debug)] -// A more mature client would also have session_key: Option, but not my problem -pub struct Sigv4Client { - // Would it makes more sense for these to be type generics - // with trait param ToString? - // Either that or just &str or String...wait, union? - // Nah there has to be a better way to do it than...that - // but I don't wanna enum!!! - service: String, - region: String, - // Would it makes more sense for these to be type generics - // with trait param AsRef<[u8]>? - access_key: String, - secret_key: String, -} -/// NOTE: This only impliments functions that require one of the Sigv4Client fields. -/// For other functions related to the signing proccess, they are defined above, including the -/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html -impl Sigv4Client { - /// Creates a new instance of the Sigv4Client for a particular service, in a region, with your - /// private and public access keys. - /// - /// For some reason this function will take any values that impl Into, so you can pass - /// &str, String, or something else if you decide to get freaky. - pub fn new( - service: impl Into, - region: impl Into, - pub_key: impl Into, - priv_key: impl Into, - ) -> Self { - Self { - service: service.into(), - region: region.into(), - access_key: pub_key.into(), - secret_key: priv_key.into(), - } - } - - // In a more mature client, this might be an enum of AWSRegions - // I also don't even know if this could ever be useful lol, wouldn't you have individual - // clients for each region or use "auto" for AWS to figure it out for you? whatever. - pub fn set_region(&mut self, region: impl Into) { - self.region = region.into() - } - - fn credential_scope(&self, date: &str) -> String { - format!( - "{}/{}/{}/aws4_request", - date, - lowercase(&self.region), - lowercase(&self.service) - ) - } - - fn derive_signing_key(&self, date: &str) -> Vec { - let secret_key = &self.secret_key; - let key = format!("AWS4{secret_key}"); - let date_key = hmac_sha256(key.as_bytes(), date); - let date_region_key = hmac_sha256(&date_key, &self.region); - let date_region_service_key = hmac_sha256(&date_region_key, &self.service); - hmac_sha256(&date_region_service_key, "aws4_request") - } - - // --- API --- - /// This is the only function to use <3 - pub fn signature>( - &self, - method: http::Method, - uri: http::Uri, - // Should probably make this a header map, then turn it into a Vec(String, String) to sort - // by header name cause Amazon said so. - mut headers: Vec<(String, String)>, - payload: T, - ) -> (String, http::HeaderMap) { - let auth_algorithm = "AWS4-HMAC-SHA256"; - let now = Utc::now(); - let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); - let date = now.format("%Y%m%d").to_string(); - let payload_as_bytes = payload.as_ref(); - let payload_hash = if payload_as_bytes.is_empty() { - EMPTY_PAYLOAD_HASH.to_string() - } else { - hash(payload_as_bytes) - }; - - // Add x-amz-date header if not already present - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) - { - headers.push(("x-amz-date".to_string(), amz_date.clone())); - } - - // Canonical request - let (canonical_request, mut headers, signed_headers) = - create_canonical_request(method, uri, headers, &payload_hash); - - // String to sign - let scope = self.credential_scope(&date); - let hashed_canonical_request = hash(&canonical_request); - let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request); - - // Signing key + signature - let signing_key = self.derive_signing_key(&date); - let signature = hex(calculate_signature(&signing_key, &string_to_sign)); - - // Authorization header - let access_key = &self.access_key; - let credential = format!("{access_key}/{scope}"); - let auth_header = format!( - "{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}" - ); - - println!("\n--- AWS SigV4 Debug ---"); - println!("1. CanonicalRequest:\n---\n{canonical_request}\n---"); - println!("2. StringToSign:\n---\n{string_to_sign}\n---"); - println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key)); - println!("4. Signature:\n---\n{signature}\n---"); - println!("5. Authorization Header:\n---\n{auth_header}\n---"); - - headers.push(("authorization".to_string(), auth_header)); - - let mut header_map: http::HeaderMap = http::HeaderMap::new(); - for (header, value) in headers.clone() { - header_map.insert( - http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(), - http::HeaderValue::from_str(&value).unwrap(), - ); - } - (signature, header_map) - } -} diff --git a/src/lib.rs b/src/lib.rs index 02b7efa..e2c4068 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ -mod aws_signing; mod error; mod mimetypes; pub use error::R2Error; @@ -9,11 +8,3 @@ pub use _async::{R2Bucket, R2Client}; #[cfg(feature = "sync")] pub mod sync; - -#[cfg(test)] -mod test { - // use crate::{R2Bucket, R2Client, sync}; - - #[test] - fn test() {} -} diff --git a/src/mimetypes.rs b/src/mimetypes.rs index 28ffcc1..c3ae6c4 100644 --- a/src/mimetypes.rs +++ b/src/mimetypes.rs @@ -1,103 +1,112 @@ +pub fn get_mimetype(key: &str) -> &'static str { + match key { + // Image formats + ".png" => "image/png", + ".jpg" | ".jpeg" => "image/jpeg", + ".gif" => "image/gif", + ".svg" => "image/svg+xml", + ".ico" => "image/x-icon", + ".webp" => "image/webp", + // Audio formats + ".m4a" => "audio/x-m4a", + ".mp3" => "audio/mpeg", + ".wav" => "audio/wav", + ".ogg" => "audio/ogg", -pub enum Mime{} + // Video formats + ".mp4" => "video/mp4", + ".avi" => "video/x-msvideo", + ".mov" => "video/quicktime", + ".flv" => "video/x-flv", + ".wmv" => "video/x-ms-wmv", + ".webm" => "video/webm", -impl Mime { - pub fn get_mimetype(key: &str) -> &'static str { - match key { - // Image formats - ".png" => "image/png", - ".jpg" | ".jpeg" => "image/jpeg", - ".gif" => "image/gif", - ".svg" => "image/svg+xml", - ".ico" => "image/x-icon", - ".webp" => "image/webp", + // Document formats + ".pdf" => "application/pdf", + ".doc" => "application/msword", + ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".ppt" => "application/vnd.ms-powerpoint", + ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ".xls" => "application/vnd.ms-excel", + ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".txt" => "text/plain", - // Audio formats - ".m4a" => "audio/x-m4a", - ".mp3" => "audio/mpeg", - ".wav" => "audio/wav", - ".ogg" => "audio/ogg", + // Web formats + ".html" => "text/html", + ".css" => "text/css", + ".js" => "application/javascript", + ".json" => "application/json", + ".xml" => "application/xml", - // Video formats - ".mp4" => "video/mp4", - ".avi" => "video/x-msvideo", - ".mov" => "video/quicktime", - ".flv" => "video/x-flv", - ".wmv" => "video/x-ms-wmv", - ".webm" => "video/webm", + // Other formats + ".csv" => "text/csv", + ".zip" => "application/zip", + ".tar" => "application/x-tar", + ".gz" => "application/gzip", + ".rar" => "application/vnd.rar", + ".7z" => "application/x-7z-compressed", + ".eps" => "application/postscript", + ".sql" => "application/sql", + ".java" => "text/x-java-source", + _ => "application/octet-stream", + } +} - // Document formats - ".pdf" => "application/pdf", - ".doc" => "application/msword", - ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - ".ppt" => "application/vnd.ms-powerpoint", - ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation", - ".xls" => "application/vnd.ms-excel", - ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - ".txt" => "text/plain", - - // Web formats - ".html" => "text/html", - ".css" => "text/css", - ".js" => "application/javascript", - ".json" => "application/json", - ".xml" => "application/xml", - - // Other formats - ".csv" => "text/csv", - ".zip" => "application/zip", - ".tar" => "application/x-tar", - ".gz" => "application/gzip", - ".rar" => "application/vnd.rar", - ".7z" => "application/x-7z-compressed", - ".eps" => "application/postscript", - ".sql" => "application/sql", - ".java" => "text/x-java-source", - _ => "application/octet-stream", - } - } - - pub fn get_mimetype_from_fp(file_path: &str) -> &str { - // Sorry I just really wanted to get it done in a one liner. - // This splits a filepath based off ".", in reverse order, so that the first element will - // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") - // This is formated back to ".jpeg" because it's how the match statement is working. - // I could very easily change it but idk it was an interesting thing. - Self::get_mimetype( - &format!( - ".{}", file_path.rsplit(".") - .next() - .unwrap_or("time_to_be_an_octet_stream_lmao") - ) - ) - } - +pub fn get_mimetype_from_fp(file_path: &str) -> &str { + // Sorry I just really wanted to get it done in a one liner. + // This splits a filepath based off ".", in reverse order, so that the first element will + // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") + // This is formated back to ".jpeg" because it's how the match statement is working. + // I could very easily change it but idk it was an interesting thing. + // + // Hey, so maybe you should change the match statement to not care about the '.'? + // Then again this is just being used for this project, so I guess it doesn't really matter + get_mimetype(&format!( + ".{}", + file_path + .rsplit(".") + .next() + .unwrap_or("time_to_be_an_octet_stream_lmao") + )) } #[cfg(test)] mod tests { - use super::*; + use super::*; - #[test] - fn match_mime_test() { - assert_eq!(Mime::get_mimetype(".tar"), "application/x-tar"); - } + #[test] + fn match_mime_test() { + assert_eq!(get_mimetype(".tar"), "application/x-tar"); + } - #[test] - fn default_mime_test() { - assert_eq!(Mime::get_mimetype(".bf"), "application/octet-stream"); - } + #[test] + fn default_mime_test() { + assert_eq!(get_mimetype(".bf"), "application/octet-stream"); + } - #[test] - fn mime_from_file() { - assert_eq!(Mime::get_mimetype_from_fp("test.ico"), "image/x-icon"); - } + #[test] + fn mime_from_file() { + assert_eq!(get_mimetype_from_fp("test.ico"), "image/x-icon"); + } - #[test] - fn mime_from_file_path() { - assert_eq!(Mime::get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), "application/pdf"); - assert_eq!(Mime::get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), "text/plain") - } + #[test] + fn mime_from_file_path() { + assert_eq!( + get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), + "application/pdf" + ); + assert_eq!( + get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), + "text/plain" + ) + } -} \ No newline at end of file + #[test] + fn no_ext() { + assert_eq!( + get_mimetype_from_fp("edge_case_lmao"), + "application/octet-stream" + ) + } +} diff --git a/src/sync/r2bucket.rs b/src/sync/r2bucket.rs index ec4a471..e9fec51 100644 --- a/src/sync/r2bucket.rs +++ b/src/sync/r2bucket.rs @@ -1,4 +1,4 @@ -use crate::R2Client; +use crate::sync::R2Client; use crate::R2Error; #[derive(Debug)] @@ -29,54 +29,26 @@ impl R2Bucket { Self { bucket, client } } - pub async fn upload_file( - &self, - local_file_path: &str, - r2_file_key: &str, - ) -> Result<(), R2Error> { + pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> { self.client // I'm pasing None to let the R2Client derive the content type from the local_file_path .upload_file(&self.bucket, local_file_path, r2_file_key, None) - .await } - pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client .download_file(&self.bucket, r2_file_key, local_path, None) - .await } - pub async fn list_files( - &self, - ) -> Result>, R2Error> { - self.client.list_files(&self.bucket).await + pub fn list_files(&self) -> Result>, R2Error> { + self.client.list_files(&self.bucket) } - pub async fn list_folders(&self) -> Result, R2Error> { - self.client.list_folders(&self.bucket).await + pub fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket) + } + + pub fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> { + self.client.delete(&self.bucket, r2_file_key) } } - -#[cfg(test)] -mod tests { - use super::*; - - fn get_test_bucket() -> R2Bucket { - dotenv::dotenv().ok(); - R2Bucket::new("test-bucket".to_string()) - } - - #[test] - fn test_bucket_construction() { - let bucket = get_test_bucket(); - assert_eq!(bucket.bucket, "test-bucket"); - } - - // Example async test (requires a runtime, so ignored by default) - // #[tokio::test] - // async fn test_upload_file() { - // let bucket = get_test_bucket(); - // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; - // assert!(result.is_ok()); - // } -} diff --git a/src/sync/r2client.rs b/src/sync/r2client.rs index c87abdc..0176fd7 100644 --- a/src/sync/r2client.rs +++ b/src/sync/r2client.rs @@ -1,14 +1,15 @@ use crate::R2Error; -use crate::aws_signing::Sigv4Client; -use crate::mimetypes::Mime; +use crate::mimetypes::get_mimetype_from_fp; +use aws_sigv4::SigV4Credentials; use http::Method; +use log::trace; use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; #[derive(Debug)] pub struct R2Client { - sigv4: Sigv4Client, + sigv4: SigV4Credentials, endpoint: String, } impl R2Client { @@ -23,14 +24,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), + sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -42,10 +43,11 @@ impl R2Client { key: Option<&str>, payload: impl AsRef<[u8]>, content_type: Option<&str>, + extra_headers: Option>, ) -> Result { let uri = http::Uri::from_str(&self.build_url(bucket, key)) .expect("invalid uri rip (make sure the build_url function works as intended)"); - let mut headers = Vec::new(); + let mut headers = extra_headers.unwrap_or_default(); headers.push(( "host".to_string(), uri.host().expect("Should have host in URI").to_owned(), @@ -55,35 +57,10 @@ impl R2Client { } let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); - // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap - // directly. - // // Granted this impl had much better handling, fuck it we ball. - // let mut header_map = header::HeaderMap::new(); - // for header in headers { - // header_map.insert( - // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header name out of {}", - // header.0.to_lowercase() - // ) - // }) - // .unwrap(), - // HeaderValue::from_str(&header.1) - // .inspect_err(|e| { - // eprint!( - // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", - // header.1, - // header.0.to_lowercase(), - // ) - // }) - // .unwrap(), - // ); - // } Ok(header_map) } - pub async fn upload_file( + pub fn upload_file( &self, bucket: &str, local_file_path: &str, @@ -92,12 +69,16 @@ impl R2Client { ) -> Result<(), R2Error> { // Payload (file data) let payload = std::fs::read(local_file_path)?; + trace!( + "[upload_file] Payload hash for signing: {}", + aws_sigv4::hash(&payload) + ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { Some(content_type) } else { - Some(Mime::get_mimetype_from_fp(local_file_path)) + Some(get_mimetype_from_fp(local_file_path)) }; let headers = self.create_headers( Method::PUT, @@ -105,17 +86,18 @@ impl R2Client { Some(r2_file_key), &payload, content_type, + None, )?; + trace!("[upload_file] Headers sent to request: {headers:#?}"); let file_url = self.build_url(bucket, Some(r2_file_key)); - let client = reqwest::Client::new(); + let client = reqwest::blocking::Client::new(); let resp = client .put(&file_url) .headers(headers) .body(payload) - .send() - .await?; + .send()?; let status = resp.status(); - let text = resp.text().await?; + let text = resp.text()?; if status.is_success() { Ok(()) } else { @@ -128,60 +110,87 @@ impl R2Client { )) } } - pub async fn download_file( + pub fn download_file( &self, bucket: &str, key: &str, local_path: &str, + extra_headers: Option>, ) -> Result<(), R2Error> { // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // I don't know if I should trust it though, I don't see public impls with this. - let payload = "UNSIGNED-PAYLOAD"; - let content_type = Mime::get_mimetype_from_fp(local_path); + let payload = ""; + trace!("[download_file] Payload for signing: (empty)"); let headers = - self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); - let client = reqwest::Client::new(); - let resp = client.get(&file_url).headers(headers).send().await?; + self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; + trace!("[download_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(key)); + let client = reqwest::blocking::Client::new(); + let resp = client.get(&file_url).headers(headers).send()?; let status = resp.status(); if status.is_success() { - std::fs::write(local_path, resp.bytes().await?)?; + std::fs::write(local_path, resp.bytes()?)?; Ok(()) } else { Err(R2Error::FailedRequest( format!("dowloading file \"{key}\" from bucket \"{bucket}\""), status, - resp.text().await?, + resp.text()?, )) } } - async fn get_bucket_listing(&self, bucket: &str) -> Result { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/xml"; - let headers = - self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; + pub fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> { + let payload = ""; + trace!("[delete_file] Payload for signing: (empty)"); + let headers = self.create_headers( + Method::DELETE, + bucket, + Some(remote_key), + payload, + None, + None, + )?; + trace!("[delete_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(remote_key)); + let client = reqwest::blocking::Client::new(); + let resp = client.delete(&file_url).headers(headers).send()?; + let status = resp.status(); + if status.is_success() { + Ok(()) + } else { + Err(R2Error::FailedRequest( + format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""), + status, + resp.text()?, + )) + } + } + fn get_bucket_listing(&self, bucket: &str) -> Result { + let payload = ""; + trace!("[get_bucket_listing] Payload for signing: (empty)"); + let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; + trace!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); - let client = reqwest::Client::new(); + let client = reqwest::blocking::Client::new(); let resp = client .get(&url) .headers(headers) .send() - .await .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - Ok(resp.text().await.map_err(R2Error::from)?) + Ok(resp.text().map_err(R2Error::from)?) } else { Err(R2Error::FailedRequest( String::from("list bucket...folders or something idfk"), status, - resp.text().await.map_err(R2Error::from)?, + resp.text().map_err(R2Error::from)?, )) } } - pub async fn list_files(&self, bucket: &str) -> Result>, R2Error> { - let xml = self.get_bucket_listing(bucket).await?; + pub fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket)?; let mut files_dict: HashMap> = HashMap::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -203,8 +212,8 @@ impl R2Client { Ok(files_dict) } - pub async fn list_folders(&self, bucket: &str) -> Result, R2Error> { - let xml = self.get_bucket_listing(bucket).await?; + pub fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket)?; let mut folders = std::collections::HashSet::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -214,10 +223,10 @@ impl R2Client { .filter(|e| e.name == "Contents") { let key_elem = content.get_child("Key").and_then(|k| k.get_text()); - if let Some(file_key) = key_elem { - if let Some(idx) = file_key.find('/') { - folders.insert(file_key[..idx].to_string()); - } + if let Some(file_key) = key_elem + && let Some(idx) = file_key.find('/') + { + folders.insert(file_key[..idx].to_string()); } } Ok(folders.into_iter().collect()) @@ -225,7 +234,10 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { - Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + Some(k) => { + let encoded_key = aws_sigv4::url_encode(k); + format!("{}/{}/{}", self.endpoint, bucket, encoded_key) + } None => format!("{}/{}/", self.endpoint, bucket), } } @@ -279,6 +291,7 @@ mod tests { Some("key"), "deadbeef", Some("application/octet-stream"), + None, ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/tests/r2_tests.rs b/tests/r2_tests.rs index 73f1693..f5dcf00 100644 --- a/tests/r2_tests.rs +++ b/tests/r2_tests.rs @@ -1,4 +1,3 @@ -use std::env; use std::fs; use std::io::Write; @@ -63,7 +62,6 @@ mod sync_tests { // Cleanup fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_download_path).unwrap(); - // NOTE: The test file on R2 is not deleted as there is no delete API yet. } } @@ -132,6 +130,8 @@ mod async_tests { // Cleanup fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_download_path).unwrap(); - // NOTE: The test file on R2 is not deleted as there is no delete API yet. + + // 6. Delete file + bucket.delete_file(r2_file_key).await.unwrap(); } } diff --git a/todo.md b/todo.md index 562cdd8..bad86f6 100644 --- a/todo.md +++ b/todo.md @@ -1,29 +1,12 @@ -Okay I think I did everything, so to clean up: +## For release: + - [ ] Create a crate::Result that is Result, and have Ok(status_code) + - [ ] Consider dropping more dependencies, using hyper or some lower level stuff for async, and then http for blocking + - [ ] A way to view the file contents (UTF-8 valid) would be cool + - [ ] Add functions that will list files with their metadata (perhaps a simple R2File type?) + - [ ] Clear out all all print statements and consider logging (this is a library, after all) -# Fatal Bug -When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header. - -Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed. -The output of that function includes request and headers with the host headers in there. -I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector -I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it - -The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves. -Technically the output of the canonical_request is correct: -Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"` -However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure. -But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows: -Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"` -Which seems to match up, however the final signature that the main `signature` function returns is: -Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]` -So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one. -This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector -I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything. - -Okay after saying all that, surely I could've just fixed it by now and will remember it in the later. - -- [X] Update the sync library -- [X] Make a .env with test-bucket creds -- [ ] Actually test the damn thing -- [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?) +## Dev (since we're so back): + - [X] Update the sync library + - [X] Make a .env with test-bucket creds + - [X] Actually test the damn thing