diff --git a/Cargo.toml b/Cargo.toml index 091590b..f2d3db4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ urlencoding = "2.1.3" http = "1.3.1" [dev-dependencies] -tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] } dotenv = "0.15" [features] diff --git a/src/_async/r2bucket.rs b/src/_async/r2bucket.rs index fcd3cae..ec4a471 100644 --- a/src/_async/r2bucket.rs +++ b/src/_async/r2bucket.rs @@ -1,14 +1,21 @@ use crate::R2Client; use crate::R2Error; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Bucket { bucket: String, pub client: R2Client, } impl R2Bucket { - pub fn new(bucket: String, client: R2Client) -> Self { + pub fn new(bucket: String) -> Self { + Self { + bucket, + client: R2Client::new(), + } + } + + pub fn from_client(bucket: String, client: R2Client) -> Self { Self { bucket, client } } @@ -35,7 +42,7 @@ impl R2Bucket { pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client - .download_file(&self.bucket, r2_file_key, local_path) + .download_file(&self.bucket, r2_file_key, local_path, None) .await } @@ -53,19 +60,10 @@ impl R2Bucket { #[cfg(test)] mod tests { use super::*; - use crate::R2Client; - use std::env; fn get_test_bucket() -> R2Bucket { dotenv::dotenv().ok(); - let access_key = - env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); - let secret_key = - env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); - let endpoint = env::var("R2_ENDPOINT") - .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); - let client = R2Client::from_credentials(access_key, secret_key, endpoint); - R2Bucket::new("test-bucket".to_string(), client) + R2Bucket::new("test-bucket".to_string()) } #[test] diff --git a/src/_async/r2client.rs b/src/_async/r2client.rs index 0b5c722..5f3e67c 100644 --- a/src/_async/r2client.rs +++ b/src/_async/r2client.rs @@ -1,18 +1,16 @@ +use crate::R2Error; +use crate::aws_signing::Sigv4Client; use crate::mimetypes::Mime; -use crate::{R2Error, aws_signing}; use http::Method; -use reqwest::header::{self, HeaderName, HeaderValue}; -use sha2::{Digest, Sha256}; +use reqwest::header::HeaderMap; use std::collections::HashMap; use std::str::FromStr; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Client { - access_key: String, - secret_key: String, + sigv4: Sigv4Client, endpoint: String, } - impl R2Client { fn get_env() -> Result<(String, String, String), R2Error> { let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; @@ -25,16 +23,14 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } @@ -44,40 +40,47 @@ impl R2Client { method: http::Method, bucket: &str, key: Option<&str>, - payload_hash: &str, + payload: impl AsRef<[u8]>, content_type: Option<&str>, - ) -> Result { + extra_headers: Option>, + ) -> Result { let uri = http::Uri::from_str(&self.build_url(bucket, key)) .expect("invalid uri rip (make sure the build_url function works as intended)"); - let mut headers = Vec::new(); - if method == Method::GET { - headers.push(( - "x-amz-content-sha256".to_string(), - "UNSIGNED-PAYLOAD".to_string(), - )) - } + let mut headers = extra_headers.unwrap_or_default(); + headers.push(( + "host".to_string(), + uri.host().expect("Should have host in URI").to_owned(), + )); if let Some(content_type) = content_type { headers.push(("content-type".to_string(), content_type.to_owned())) } - let (_, headers) = aws_signing::signature( - method, - uri, - headers, - payload_hash, - "s3", - "us-east-1", - &self.secret_key, - &self.access_key, - ); - let mut header_map = header::HeaderMap::new(); - for header in headers { - header_map.insert( - HeaderName::from_lowercase(&header.0.to_lowercase().as_bytes()) - .expect("shit tragic"), - HeaderValue::from_str(&header.1).expect("shit more tragic"), - ); - } + let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); + // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap + // directly. + // // Granted this impl had much better handling, fuck it we ball. + // let mut header_map = header::HeaderMap::new(); + // for header in headers { + // header_map.insert( + // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header name out of {}", + // header.0.to_lowercase() + // ) + // }) + // .unwrap(), + // HeaderValue::from_str(&header.1) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", + // header.1, + // header.0.to_lowercase(), + // ) + // }) + // .unwrap(), + // ); + // } Ok(header_map) } @@ -88,9 +91,12 @@ impl R2Client { r2_file_key: &str, content_type: Option<&str>, ) -> Result<(), R2Error> { - // --- Hash Payload -- - let file_data = std::fs::read(local_file_path)?; - let payload_hash = hex::encode(Sha256::digest(&file_data)); + // Payload (file data) + let payload = std::fs::read(local_file_path)?; + println!( + "[upload_file] Payload hash for signing: {}", + crate::aws_signing::hash(&payload) + ); // Set HTTP Headers let content_type = if let Some(content_type) = content_type { @@ -102,15 +108,17 @@ impl R2Client { Method::PUT, bucket, Some(r2_file_key), - &payload_hash, + &payload, content_type, + None, )?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); + println!("[upload_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(r2_file_key)); let client = reqwest::Client::new(); let resp = client .put(&file_url) .headers(headers) - .body(file_data) + .body(payload) .send() .await?; let status = resp.status(); @@ -118,10 +126,13 @@ impl R2Client { if status.is_success() { Ok(()) } else { - Err(R2Error::Other(format!( - "Upload failed with status {}: {}", - status, text - ))) + Err(R2Error::FailedRequest( + format!( + "upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\"" + ), + status, + text, + )) } } pub async fn download_file( @@ -129,36 +140,35 @@ impl R2Client { bucket: &str, key: &str, local_path: &str, + extra_headers: Option>, ) -> Result<(), R2Error> { - let payload_hash = hex::encode(Sha256::digest("")); - let content_type = Mime::get_mimetype_from_fp(local_path); - let headers = self.create_headers( - Method::GET, - bucket, - Some(key), - &payload_hash, - Some(content_type), - )?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); + // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. + // I don't know if I should trust it though, I don't see public impls with this. + let payload = ""; + println!("[download_file] Payload for signing: (empty)"); + let headers = + self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; + println!("[download_file] Headers sent to request: {headers:#?}"); + let file_url = self.build_url(bucket, Some(key)); let client = reqwest::Client::new(); let resp = client.get(&file_url).headers(headers).send().await?; let status = resp.status(); - let content = resp.bytes().await?; if status.is_success() { - std::fs::write(local_path, &content)?; + std::fs::write(local_path, resp.bytes().await?)?; Ok(()) } else { - Err(R2Error::Other(format!( - "Download failed with status {}", - status - ))) + Err(R2Error::FailedRequest( + format!("dowloading file \"{key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) } } async fn get_bucket_listing(&self, bucket: &str) -> Result { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/xml"; - let headers = - self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; + let payload = ""; + println!("[get_bucket_listing] Payload for signing: (empty)"); + let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; + println!("[get_bucket_listing] Headers sent to request: {headers:#?}"); let url = self.build_url(bucket, None); let client = reqwest::Client::new(); let resp = client @@ -169,9 +179,13 @@ impl R2Client { .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - resp.text().await.map_err(R2Error::from) + Ok(resp.text().await.map_err(R2Error::from)?) } else { - Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + Err(R2Error::FailedRequest( + String::from("list bucket...folders or something idfk"), + status, + resp.text().await.map_err(R2Error::from)?, + )) } } @@ -220,20 +234,17 @@ impl R2Client { fn build_url(&self, bucket: &str, key: Option<&str>) -> String { match key { - Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), + Some(k) => { + let encoded_key = crate::aws_signing::url_encode(k); + format!("{}/{}/{}", self.endpoint, bucket, encoded_key) + } None => format!("{}/{}/", self.endpoint, bucket), } } } impl Default for R2Client { fn default() -> Self { - let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); - - Self { - access_key, - secret_key, - endpoint, - } + Self::new() } } @@ -254,11 +265,12 @@ mod tests { fn r2client_env() { let r2client = r2client_from_env(); - assert_eq!(r2client.access_key, "AKIAEXAMPLE"); - assert_eq!( - r2client.secret_key, - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" - ); + // Sorry but I don't know if I should have the keys on the sigv4 pub or not yet + // assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + // assert_eq!( + // r2client.secret_key, + // "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + // ); assert_eq!( r2client.endpoint, "https://example.r2.cloudflarestorage.com" @@ -279,6 +291,7 @@ mod tests { Some("key"), "deadbeef", Some("application/octet-stream"), + None, ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/src/aws_signing.rs b/src/aws_signing.rs index b3b5333..5c792b1 100644 --- a/src/aws_signing.rs +++ b/src/aws_signing.rs @@ -4,6 +4,8 @@ use sha2::{Digest, Sha256}; type Hmac256 = Hmac; +const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; + // --- Utility functions --- fn lowercase(string: &str) -> String { string.to_lowercase() @@ -27,11 +29,11 @@ fn trim(string: &str) -> String { string.trim().to_string() } -fn hash>(payload: T) -> String { +pub fn hash>(payload: T) -> String { hex(sha256hash(payload)) } -fn url_encode(url: &str) -> String { +pub fn url_encode(url: &str) -> String { let mut url = urlencoding::encode(url).into_owned(); let encoded_to_replacement: [(&str, &str); 4] = [("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")]; @@ -41,13 +43,16 @@ fn url_encode(url: &str) -> String { url } +// --- Signing Functions --- +// These don't use any parts of the Sigv4Client, so they are external + // --- Canonical request --- fn create_canonical_request( method: http::Method, uri: http::Uri, mut headers: Vec<(String, String)>, hashed_payload: &str, -) -> (String, String, String) { +) -> (String, Vec<(String, String)>, String) { // HTTPMethod let http_method = method.to_string(); @@ -70,14 +75,14 @@ fn create_canonical_request( pairs.sort_by(|a, b| a.0.cmp(&b.0)); pairs .into_iter() - .map(|(k, v)| format!("{}={}", k, v)) + .map(|(k, v)| format!("{k}={v}")) .collect::>() .join("&") } else { String::new() }; - // Ensure required headers (host and x-amz-date) are present + // checks for proper host headers let host = uri .host() .expect("uri passed without a proper host") @@ -86,6 +91,16 @@ fn create_canonical_request( headers.push(("host".to_string(), host)); } + if !headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256")) + { + headers.push(( + "x-amz-content-sha256".to_string(), + hashed_payload.to_owned(), + )) + } + // CanonicalHeaders + SignedHeaders let mut http_headers = headers .iter() @@ -95,7 +110,7 @@ fn create_canonical_request( let canonical_headers: String = http_headers .iter() - .map(|(k, v)| format!("{}:{}\n", k, v)) + .map(|(k, v)| format!("{k}:{v}\n")) .collect(); let signed_headers: String = http_headers @@ -106,92 +121,174 @@ fn create_canonical_request( // Final canonical request let canonical_request = format!( - "{}\n{}\n{}\n{}\n{}\n{}", - http_method, - canonical_uri, - canonical_query_string, - canonical_headers, - signed_headers, - hashed_payload + "{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}" ); - (canonical_request, signed_headers, canonical_headers) + (canonical_request, http_headers, signed_headers) } - -fn credential_scope(date: &str, region: &str, service: &str) -> String { - format!( - "{}/{}/{}/aws4_request", - date, - lowercase(region), - lowercase(service) - ) -} - -fn string_to_sign(scope: &str, amz_date: &str, canonical_request: &str) -> String { - format!( - "{}\n{}\n{}\n{}", - "AWS4-HMAC-SHA256", - amz_date, - scope, - hex(sha256hash(canonical_request)) - ) -} - -fn derive_signing_key(key: &str, date: &str, region: &str, service: &str) -> Vec { - let secret_key = format!("AWS4{}", key); - let date_key = hmac_sha256(secret_key.as_bytes(), date); - let date_region_key = hmac_sha256(&date_key, region); - let date_region_service_key = hmac_sha256(&date_region_key, service); - hmac_sha256(&date_region_service_key, "aws4_request") -} - fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec { hmac_sha256(signing_key, string_to_sign) } -// --- API --- -pub fn signature( - method: http::Method, - uri: http::Uri, - mut headers: Vec<(String, String)>, - hashed_payload: &str, - service: &str, - region: &str, - secret_key: &str, - access_key: &str, -) -> (String, Vec<(String, String)>) { - let now = Utc::now(); - let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); - let date_stamp = now.format("%Y%m%d").to_string(); - - // Add x-amz-date header if not already present - if !headers - .iter() - .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) - { - headers.push(("x-amz-date".to_string(), amz_date.clone())); +fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String { + format!( + "{}\n{}\n{}\n{}", + "AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request + ) +} +/// Structure containing all the data relevant for an AWS Service utilizing SigV4. +/// Service: String containing the AWS service (e.g. "ec2" or "s3") +/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1") +/// Access Key: The "Access Key" to use with the AWS service (crazy, ik) +/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah) +/// +/// ``` +/// +/// +/// use aws_signing::Sigv4Client; +/// +/// let s3_client = Sigv4Client::new( +/// "s3", +/// "us-east-1", +/// std::env::var("S3_ACCESS_KEY").unwrap(), +/// std::env::var("S3_SECRET_KEY").unwrap(), +/// ) +/// let (_, request_headers) = s3client.signature( +/// http::Method::GET, +/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"), +/// vec![("content-type", "text/plain")], +/// "" // Since it's a GET request, the payload is "" +/// ) +/// ``` +#[derive(Debug)] +// A more mature client would also have session_key: Option, but not my problem +pub struct Sigv4Client { + // Would it makes more sense for these to be type generics + // with trait param ToString? + // Either that or just &str or String...wait, union? + // Nah there has to be a better way to do it than...that + // but I don't wanna enum!!! + service: String, + region: String, + // Would it makes more sense for these to be type generics + // with trait param AsRef<[u8]>? + access_key: String, + secret_key: String, +} +/// NOTE: This only impliments functions that require one of the Sigv4Client fields. +/// For other functions related to the signing proccess, they are defined above, including the +/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html +impl Sigv4Client { + /// Creates a new instance of the Sigv4Client for a particular service, in a region, with your + /// private and public access keys. + /// + /// For some reason this function will take any values that impl Into, so you can pass + /// &str, String, or something else if you decide to get freaky. + pub fn new( + service: impl Into, + region: impl Into, + pub_key: impl Into, + priv_key: impl Into, + ) -> Self { + Self { + service: service.into(), + region: region.into(), + access_key: pub_key.into(), + secret_key: priv_key.into(), + } } - // Canonical request - let (canonical_request, signed_headers, _canonical_headers) = - create_canonical_request(method, uri, headers.clone(), hashed_payload); + // In a more mature client, this might be an enum of AWSRegions + // I also don't even know if this could ever be useful lol, wouldn't you have individual + // clients for each region or use "auto" for AWS to figure it out for you? whatever. + pub fn set_region(&mut self, region: impl Into) { + self.region = region.into() + } - // String to sign - let scope = credential_scope(&date_stamp, region, service); - let string_to_sign = string_to_sign(&scope, &amz_date, &canonical_request); + fn credential_scope(&self, date: &str) -> String { + format!( + "{}/{}/{}/aws4_request", + date, + lowercase(&self.region), + lowercase(&self.service) + ) + } - // Signing key + signature - let signing_key = derive_signing_key(secret_key, &date_stamp, region, service); - let signature = hex(calculate_signature(&signing_key, &string_to_sign)); + fn derive_signing_key(&self, date: &str) -> Vec { + let secret_key = &self.secret_key; + let key = format!("AWS4{secret_key}"); + let date_key = hmac_sha256(key.as_bytes(), date); + let date_region_key = hmac_sha256(&date_key, &self.region); + let date_region_service_key = hmac_sha256(&date_region_key, &self.service); + hmac_sha256(&date_region_service_key, "aws4_request") + } - // Authorization header - let credential = format!("{}/{}", access_key, scope); - let auth_header = format!( - "{} Credential={}, SignedHeaders={}, Signature={}", - "AWS4-HMAC-SHA256", credential, signed_headers, signature - ); + // --- API --- + /// This is the only function to use <3 + pub fn signature>( + &self, + method: http::Method, + uri: http::Uri, + // Should probably make this a header map, then turn it into a Vec(String, String) to sort + // by header name cause Amazon said so. + mut headers: Vec<(String, String)>, + payload: T, + ) -> (String, http::HeaderMap) { + let auth_algorithm = "AWS4-HMAC-SHA256"; + let now = Utc::now(); + let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string(); + let date = now.format("%Y%m%d").to_string(); + let payload_as_bytes = payload.as_ref(); + let payload_hash = if payload_as_bytes.is_empty() { + EMPTY_PAYLOAD_HASH.to_string() + } else { + hash(payload_as_bytes) + }; - headers.push(("authorization".to_string(), auth_header)); + // Add x-amz-date header if not already present + if !headers + .iter() + .any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date")) + { + headers.push(("x-amz-date".to_string(), amz_date.clone())); + } - (signature, headers) + // Canonical request + let (canonical_request, mut headers, signed_headers) = + create_canonical_request(method, uri, headers, &payload_hash); + + // String to sign + let scope = self.credential_scope(&date); + let hashed_canonical_request = hash(&canonical_request); + let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request); + + // Signing key + signature + let signing_key = self.derive_signing_key(&date); + let signature = hex(calculate_signature(&signing_key, &string_to_sign)); + + // Authorization header + let access_key = &self.access_key; + let credential = format!("{access_key}/{scope}"); + let auth_header = format!( + "{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}" + ); + + println!("\n--- AWS SigV4 Debug ---"); + println!("1. CanonicalRequest:\n---\n{canonical_request}\n---"); + println!("2. StringToSign:\n---\n{string_to_sign}\n---"); + println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key)); + println!("4. Signature:\n---\n{signature}\n---"); + println!("5. Authorization Header:\n---\n{auth_header}\n---"); + + headers.push(("authorization".to_string(), auth_header)); + + let mut header_map: http::HeaderMap = http::HeaderMap::new(); + for (header, value) in headers.clone() { + header_map.insert( + http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(), + http::HeaderValue::from_str(&value).unwrap(), + ); + } + (signature, header_map) + } } diff --git a/src/error.rs b/src/error.rs index 38cd4b9..9438385 100644 --- a/src/error.rs +++ b/src/error.rs @@ -10,6 +10,6 @@ pub enum R2Error { Xml(#[from] xmltree::ParseError), #[error("Missing environment varibles: {0}")] Env(String), - #[error("Other: {0}")] - Other(String), + #[error("Request failed during operation {0}: {1}\n{2}")] + FailedRequest(String, http::StatusCode, String), } diff --git a/src/sync/r2bucket.rs b/src/sync/r2bucket.rs index b8d6e36..ec4a471 100644 --- a/src/sync/r2bucket.rs +++ b/src/sync/r2bucket.rs @@ -1,14 +1,21 @@ +use crate::R2Client; use crate::R2Error; -use crate::sync::R2Client; -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Bucket { bucket: String, pub client: R2Client, } impl R2Bucket { - pub fn new(bucket: String, client: R2Client) -> Self { + pub fn new(bucket: String) -> Self { + Self { + bucket, + client: R2Client::new(), + } + } + + pub fn from_client(bucket: String, client: R2Client) -> Self { Self { bucket, client } } @@ -22,41 +29,41 @@ impl R2Bucket { Self { bucket, client } } - pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> { + pub async fn upload_file( + &self, + local_file_path: &str, + r2_file_key: &str, + ) -> Result<(), R2Error> { self.client - .upload_file(&self.bucket, local_file_path, r2_file_key) + // I'm pasing None to let the R2Client derive the content type from the local_file_path + .upload_file(&self.bucket, local_file_path, r2_file_key, None) + .await } - pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { + pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { self.client - .download_file(&self.bucket, r2_file_key, local_path) + .download_file(&self.bucket, r2_file_key, local_path, None) + .await } - pub fn list_files(&self) -> Result>, R2Error> { - self.client.list_files(&self.bucket) + pub async fn list_files( + &self, + ) -> Result>, R2Error> { + self.client.list_files(&self.bucket).await } - pub fn list_folders(&self) -> Result, R2Error> { - self.client.list_folders(&self.bucket) + pub async fn list_folders(&self) -> Result, R2Error> { + self.client.list_folders(&self.bucket).await } } #[cfg(test)] mod tests { use super::*; - use crate::sync::R2Bucket; - use std::env; fn get_test_bucket() -> R2Bucket { dotenv::dotenv().ok(); - let access_key = - env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string()); - let secret_key = - env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string()); - let endpoint = env::var("R2_ENDPOINT") - .unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string()); - let client = R2Client::from_credentials(access_key, secret_key, endpoint); - R2Bucket::new("test-bucket".to_string(), client) + R2Bucket::new("test-bucket".to_string()) } #[test] @@ -64,4 +71,12 @@ mod tests { let bucket = get_test_bucket(); assert_eq!(bucket.bucket, "test-bucket"); } + + // Example async test (requires a runtime, so ignored by default) + // #[tokio::test] + // async fn test_upload_file() { + // let bucket = get_test_bucket(); + // let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await; + // assert!(result.is_ok()); + // } } diff --git a/src/sync/r2client.rs b/src/sync/r2client.rs index 36470a3..c87abdc 100644 --- a/src/sync/r2client.rs +++ b/src/sync/r2client.rs @@ -1,20 +1,16 @@ use crate::R2Error; +use crate::aws_signing::Sigv4Client; use crate::mimetypes::Mime; -use chrono::Utc; -use hmac::{Hmac, Mac}; -use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue}; -use sha2::{Digest, Sha256}; +use http::Method; +use reqwest::header::HeaderMap; use std::collections::HashMap; +use std::str::FromStr; -type HmacSHA256 = Hmac; - -#[derive(Clone, Debug)] +#[derive(Debug)] pub struct R2Client { - access_key: String, - secret_key: String, + sigv4: Sigv4Client, endpoint: String, } - impl R2Client { fn get_env() -> Result<(String, String, String), R2Error> { let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"]; @@ -27,234 +23,165 @@ impl R2Client { let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { Self { - access_key, - secret_key, + sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), endpoint, } } - fn sign(&self, key: &[u8], msg: &str) -> Vec { - let mut mac = HmacSHA256::new_from_slice(key).expect("Invalid key length"); - mac.update(msg.as_bytes()); - mac.finalize().into_bytes().to_vec() - } - - fn get_signature_key(&self, date_stamp: &str, region: &str, service: &str) -> Vec { - let aws4_secret: String = format!("AWS4{}", self.secret_key); - let k_date = self.sign(aws4_secret.as_bytes(), date_stamp); - let k_region = self.sign(&k_date, region); - let k_service = self.sign(&k_region, service); - self.sign(&k_service, "aws4_request") - } - fn create_headers( &self, - method: &str, + method: http::Method, bucket: &str, - key: &str, - payload_hash: &str, - content_type: &str, - ) -> Result { - // Robustly extract host from endpoint - let endpoint = self.endpoint.trim_end_matches('/'); - // Not proud of this, it is really dumb and hard to read, but it'll work I suppose...I think... - let host = endpoint - .split("//") - .nth(1) - .unwrap_or(endpoint) - .split('/') - .next() - .unwrap_or(endpoint) - .split(':') - .next() - .unwrap_or(endpoint) - .trim(); - if host.is_empty() { - return Err(R2Error::Other( - "Host header could not be determined from endpoint".to_string(), - )); + key: Option<&str>, + payload: impl AsRef<[u8]>, + content_type: Option<&str>, + ) -> Result { + let uri = http::Uri::from_str(&self.build_url(bucket, key)) + .expect("invalid uri rip (make sure the build_url function works as intended)"); + let mut headers = Vec::new(); + headers.push(( + "host".to_string(), + uri.host().expect("Should have host in URI").to_owned(), + )); + if let Some(content_type) = content_type { + headers.push(("content-type".to_string(), content_type.to_owned())) } - let t = Utc::now(); - let amz_date = t.format("%Y%m%dT%H%M%SZ").to_string(); - let date_stamp = t.format("%Y%m%d").to_string(); - let mut headers_vec = [ - ("host", host), - ("x-amz-date", &amz_date), - ("x-amz-content-sha256", payload_hash), - ("content-type", content_type), - ]; - headers_vec.sort_by(|a, b| a.0.cmp(b.0)); - - let signed_headers = headers_vec - .iter() - .map(|(k, _)| *k) - .collect::>() - .join(";"); - let canonical_headers = headers_vec - .iter() - .map(|(k, v)| format!("{}:{}\n", k.to_lowercase(), v)) - .collect::(); - - let canonical_uri = format!("/{}/{}", bucket, key); - let canonical_request = format!( - "{method}\n{uri}\n\n{headers}\n{signed_headers}\n{payload_hash}", - method = method, - uri = canonical_uri, - headers = canonical_headers, - signed_headers = signed_headers, - payload_hash = payload_hash - ); - let credential_scope = format!("{}/{}/s3/aws4_request", date_stamp, "auto"); - let hashed_request = hex::encode(Sha256::digest(canonical_request.as_bytes())); - let string_to_sign = format!( - "AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}", - amz_date = amz_date, - credential_scope = credential_scope, - hashed_request = hashed_request - ); - let signing_key = self.get_signature_key(&date_stamp, "auto", "s3"); - let signature = hex::encode(self.sign(&signing_key, &string_to_sign)); - let authorization = format!( - "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}", - self.access_key, credential_scope, signed_headers, signature - ); - - // Print all headers for debugging - println!("[r2client] DEBUG: Built headers:"); - println!(" host: {}", host); - println!(" x-amz-date: {}", amz_date); - println!(" x-amz-content-sha256: {}", payload_hash); - println!(" content-type: {}", content_type); - println!(" authorization: {}", authorization); - println!(" signed_headers: {}", signed_headers); - println!( - " canonical_headers: {}", - canonical_headers.replace("\n", "\\n") - ); - println!( - " canonical_request: {}", - canonical_request.replace("\n", "\\n") - ); - println!(" string_to_sign: {}", string_to_sign.replace("\n", "\\n")); - println!(" signature: {}", signature); - - let mut header_map = HeaderMap::new(); - header_map.insert( - HeaderName::from_static("x-amz-date"), - HeaderValue::from_str(&amz_date) - .map_err(|e| R2Error::Other(format!("Invalid x-amz-date: {e}")))?, - ); - header_map.insert( - HeaderName::from_static("x-amz-content-sha256"), - HeaderValue::from_str(payload_hash).map_err(|e| { - R2Error::Other(format!( - "Invalid x-amz-content-sha256: {payload_hash:?}: {e}" - )) - })?, - ); - header_map.insert( - HeaderName::from_static("authorization"), - HeaderValue::from_str(&authorization).map_err(|e| { - R2Error::Other(format!( - "Invalid authorization: {e}\nValue: {authorization}" - )) - })?, - ); - header_map.insert( - HeaderName::from_static("content-type"), - HeaderValue::from_str(content_type) - .map_err(|e| R2Error::Other(format!("Invalid content-type: {e}")))?, - ); - header_map.insert( - HeaderName::from_static("host"), - HeaderValue::from_str(host) - .map_err(|e| R2Error::Other(format!("Invalid host: {e}")))?, - ); + let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); + // // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap + // directly. + // // Granted this impl had much better handling, fuck it we ball. + // let mut header_map = header::HeaderMap::new(); + // for header in headers { + // header_map.insert( + // HeaderName::from_lowercase(header.0.to_lowercase().as_bytes()) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header name out of {}", + // header.0.to_lowercase() + // ) + // }) + // .unwrap(), + // HeaderValue::from_str(&header.1) + // .inspect_err(|e| { + // eprint!( + // "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}", + // header.1, + // header.0.to_lowercase(), + // ) + // }) + // .unwrap(), + // ); + // } Ok(header_map) } - pub fn upload_file( + pub async fn upload_file( &self, bucket: &str, local_file_path: &str, r2_file_key: &str, + content_type: Option<&str>, ) -> Result<(), R2Error> { - let file_data = std::fs::read(local_file_path)?; - let mut hasher = Sha256::new(); - hasher.update(&file_data); - let payload_hash = hex::encode(hasher.finalize()); - // let content_type = "application/octet-stream"; - let content_type = Mime::get_mimetype_from_fp(local_file_path); - let headers = - self.create_headers("PUT", bucket, r2_file_key, &payload_hash, content_type)?; - let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key); - let client = reqwest::blocking::Client::new(); + // Payload (file data) + let payload = std::fs::read(local_file_path)?; + + // Set HTTP Headers + let content_type = if let Some(content_type) = content_type { + Some(content_type) + } else { + Some(Mime::get_mimetype_from_fp(local_file_path)) + }; + let headers = self.create_headers( + Method::PUT, + bucket, + Some(r2_file_key), + &payload, + content_type, + )?; + let file_url = self.build_url(bucket, Some(r2_file_key)); + let client = reqwest::Client::new(); let resp = client .put(&file_url) .headers(headers) - .body(file_data) - .send()?; + .body(payload) + .send() + .await?; let status = resp.status(); - let text = resp.text()?; + let text = resp.text().await?; if status.is_success() { Ok(()) } else { - Err(R2Error::Other(format!( - "Upload failed with status {}: {}", - status, text - ))) + Err(R2Error::FailedRequest( + format!( + "upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\"" + ), + status, + text, + )) } } - pub fn download_file(&self, bucket: &str, key: &str, local_path: &str) -> Result<(), R2Error> { - let payload_hash = "UNSIGNED-PAYLOAD"; - let content_type = "application/octet-stream"; - let headers = self.create_headers("GET", bucket, key, payload_hash, content_type)?; + pub async fn download_file( + &self, + bucket: &str, + key: &str, + local_path: &str, + ) -> Result<(), R2Error> { + // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. + // I don't know if I should trust it though, I don't see public impls with this. + let payload = "UNSIGNED-PAYLOAD"; + let content_type = Mime::get_mimetype_from_fp(local_path); + let headers = + self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?; let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); - let client = reqwest::blocking::Client::new(); - let resp = client.get(&file_url).headers(headers).send()?; + let client = reqwest::Client::new(); + let resp = client.get(&file_url).headers(headers).send().await?; let status = resp.status(); - let content = resp.bytes()?; if status.is_success() { - std::fs::write(local_path, &content)?; + std::fs::write(local_path, resp.bytes().await?)?; Ok(()) } else { - Err(R2Error::Other(format!( - "Download failed with status {}", - status - ))) + Err(R2Error::FailedRequest( + format!("dowloading file \"{key}\" from bucket \"{bucket}\""), + status, + resp.text().await?, + )) } } - fn get_bucket_listing(&self, bucket: &str) -> Result { + async fn get_bucket_listing(&self, bucket: &str) -> Result { let payload_hash = "UNSIGNED-PAYLOAD"; let content_type = "application/xml"; - let headers = self.create_headers("GET", bucket, "", payload_hash, content_type)?; + let headers = + self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; let url = self.build_url(bucket, None); - let client = reqwest::blocking::Client::new(); + let client = reqwest::Client::new(); let resp = client .get(&url) .headers(headers) .send() + .await .map_err(R2Error::from)?; let status = resp.status(); if status.is_success() { - resp.text().map_err(R2Error::from) + Ok(resp.text().await.map_err(R2Error::from)?) } else { - Err(R2Error::Other(format!("Failed to list bucket: {}", status))) + Err(R2Error::FailedRequest( + String::from("list bucket...folders or something idfk"), + status, + resp.text().await.map_err(R2Error::from)?, + )) } } - /// List all files in the specified bucket. Returns a HashMap of folder -> `Vec`. - pub fn list_files(&self, bucket: &str) -> Result>, R2Error> { - let xml = self.get_bucket_listing(bucket)?; + pub async fn list_files(&self, bucket: &str) -> Result>, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; let mut files_dict: HashMap> = HashMap::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -276,9 +203,8 @@ impl R2Client { Ok(files_dict) } - /// List all folders in the specified bucket. Returns a Vec of folder names. - pub fn list_folders(&self, bucket: &str) -> Result, R2Error> { - let xml = self.get_bucket_listing(bucket)?; + pub async fn list_folders(&self, bucket: &str) -> Result, R2Error> { + let xml = self.get_bucket_listing(bucket).await?; let mut folders = std::collections::HashSet::new(); let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; for content in root @@ -306,13 +232,7 @@ impl R2Client { } impl Default for R2Client { fn default() -> Self { - let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); - - Self { - access_key, - secret_key, - endpoint, - } + Self::new() } } @@ -333,36 +253,18 @@ mod tests { fn r2client_env() { let r2client = r2client_from_env(); - assert_eq!(r2client.access_key, "AKIAEXAMPLE"); - assert_eq!( - r2client.secret_key, - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" - ); + // Sorry but I don't know if I should have the keys on the sigv4 pub or not yet + // assert_eq!(r2client.access_key, "AKIAEXAMPLE"); + // assert_eq!( + // r2client.secret_key, + // "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY" + // ); assert_eq!( r2client.endpoint, "https://example.r2.cloudflarestorage.com" ); } - #[test] - fn test_sign_and_signature_key() { - let client = R2Client::from_credentials( - "AKIAEXAMPLE".to_string(), - "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(), - "https://example.r2.cloudflarestorage.com".to_string(), - ); - let key = b"testkey"; - let msg = "testmsg"; - let sig = client.sign(key, msg); - assert_eq!(sig.len(), 32); // HMAC-SHA256 output is 32 bytes - - let date = "20250101"; - let region = "auto"; - let service = "s3"; - let signing_key = client.get_signature_key(date, region, service); - assert_eq!(signing_key.len(), 32); - } - #[test] fn test_create_headers() { let client = R2Client::from_credentials( @@ -372,11 +274,11 @@ mod tests { ); let headers = client .create_headers( - "PUT", + Method::PUT, "bucket", - "key", + Some("key"), "deadbeef", - "application/octet-stream", + Some("application/octet-stream"), ) .unwrap(); assert!(headers.contains_key("x-amz-date")); diff --git a/tests/r2_tests.rs b/tests/r2_tests.rs new file mode 100644 index 0000000..73f1693 --- /dev/null +++ b/tests/r2_tests.rs @@ -0,0 +1,137 @@ +use std::env; +use std::fs; +use std::io::Write; + +fn create_test_file(path: &str, content: &str) { + let mut file = fs::File::create(path).unwrap(); + file.write_all(content.as_bytes()).unwrap(); +} + +#[cfg(feature = "sync")] +mod sync_tests { + use super::create_test_file; + use r2client::sync::R2Bucket; + use std::env; + use std::fs; + + fn setup_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests"); + let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set"); + let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set"); + let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set"); + R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint) + } + + #[test] + fn test_sync_e2e() { + let bucket = setup_bucket(); + let test_content = "Hello, R2 sync world!"; + let local_upload_path = "test_upload_sync.txt"; + let r2_file_key = "test/test_upload_sync.txt"; + let local_download_path = "test_download_sync.txt"; + + create_test_file(local_upload_path, test_content); + + // 1. Upload file + bucket + .upload_file(local_upload_path, r2_file_key) + .expect("Sync upload failed"); + + // 2. List files and check if it exists + let files = bucket.list_files().expect("Sync list_files failed"); + assert!( + files + .get("test") + .unwrap() + .contains(&"test_upload_sync.txt".to_string()) + ); + + // 3. List folders and check if it exists + let folders = bucket.list_folders().expect("Sync list_folders failed"); + assert!(folders.contains(&"test".to_string())); + + // 4. Download file + bucket + .download_file(r2_file_key, local_download_path) + .expect("Sync download failed"); + + // 5. Verify content + let downloaded_content = fs::read_to_string(local_download_path).unwrap(); + assert_eq!(test_content, downloaded_content); + + // Cleanup + fs::remove_file(local_upload_path).unwrap(); + fs::remove_file(local_download_path).unwrap(); + // NOTE: The test file on R2 is not deleted as there is no delete API yet. + } +} + +#[cfg(feature = "async")] +mod async_tests { + use super::create_test_file; + use r2client::R2Bucket; + use std::env; + use std::fs; + + fn setup_bucket() -> R2Bucket { + dotenv::dotenv().ok(); + let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests"); + let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set"); + let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set"); + let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set"); + R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint) + } + + #[tokio::test] + async fn test_async_e2e() { + let bucket = setup_bucket(); + let test_content = "Hello, R2 async world!"; + let local_upload_path = "test_upload_async.txt"; + let r2_file_key = "test/test_upload_async.txt"; + let local_download_path = "test_download_async.txt"; + + create_test_file(local_upload_path, test_content); + + // 0. List files to see if a get request will go through lol + let files = bucket.list_files().await.expect("Async list_files failed"); + println!("{files:#?}"); + + // 1. Upload file + bucket + .upload_file(local_upload_path, r2_file_key) + .await + .expect("Async upload failed"); + + // 2. List files and check if it exists + let files = bucket.list_files().await.expect("Async list_files failed"); + assert!( + files + .get("test") + .unwrap() + .contains(&"test_upload_async.txt".to_string()) + ); + + // 3. List folders and check if it exists + let folders = bucket + .list_folders() + .await + .expect("Async list_folders failed"); + assert!(folders.contains(&"test".to_string())); + + // 4. Download file + bucket + .download_file(r2_file_key, local_download_path) + .await + .expect("Async download failed"); + + // 5. Verify content + let downloaded_content = fs::read_to_string(local_download_path).unwrap(); + assert_eq!(test_content, downloaded_content); + + // Cleanup + fs::remove_file(local_upload_path).unwrap(); + fs::remove_file(local_download_path).unwrap(); + // NOTE: The test file on R2 is not deleted as there is no delete API yet. + } +} diff --git a/todo.md b/todo.md index b94c17b..562cdd8 100644 --- a/todo.md +++ b/todo.md @@ -1,6 +1,29 @@ Okay I think I did everything, so to clean up: -- [ ] Update the sync library +# Fatal Bug +When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header. + +Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed. +The output of that function includes request and headers with the host headers in there. +I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector +I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it + +The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves. +Technically the output of the canonical_request is correct: +Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"` +However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure. +But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows: +Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"` +Which seems to match up, however the final signature that the main `signature` function returns is: +Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]` +So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one. +This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector +I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything. + +Okay after saying all that, surely I could've just fixed it by now and will remember it in the later. + +- [X] Update the sync library - [X] Make a .env with test-bucket creds - [ ] Actually test the damn thing - [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?) +