Giving up for now, probably won't revisit, I'm quite literally getting skill-diffed by some documentation and examples.

This commit is contained in:
foreverpyrite
2025-09-10 00:23:57 -04:00
parent 4e3075baad
commit 8b3cc6cb3e
9 changed files with 608 additions and 423 deletions

View File

@@ -22,7 +22,7 @@ urlencoding = "2.1.3"
http = "1.3.1"
[dev-dependencies]
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] }
dotenv = "0.15"
[features]

View File

@@ -1,14 +1,21 @@
use crate::R2Client;
use crate::R2Error;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct R2Bucket {
bucket: String,
pub client: R2Client,
}
impl R2Bucket {
pub fn new(bucket: String, client: R2Client) -> Self {
pub fn new(bucket: String) -> Self {
Self {
bucket,
client: R2Client::new(),
}
}
pub fn from_client(bucket: String, client: R2Client) -> Self {
Self { bucket, client }
}
@@ -35,7 +42,7 @@ impl R2Bucket {
pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> {
self.client
.download_file(&self.bucket, r2_file_key, local_path)
.download_file(&self.bucket, r2_file_key, local_path, None)
.await
}
@@ -53,19 +60,10 @@ impl R2Bucket {
#[cfg(test)]
mod tests {
use super::*;
use crate::R2Client;
use std::env;
fn get_test_bucket() -> R2Bucket {
dotenv::dotenv().ok();
let access_key =
env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string());
let secret_key =
env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string());
let endpoint = env::var("R2_ENDPOINT")
.unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string());
let client = R2Client::from_credentials(access_key, secret_key, endpoint);
R2Bucket::new("test-bucket".to_string(), client)
R2Bucket::new("test-bucket".to_string())
}
#[test]

View File

@@ -1,18 +1,16 @@
use crate::R2Error;
use crate::aws_signing::Sigv4Client;
use crate::mimetypes::Mime;
use crate::{R2Error, aws_signing};
use http::Method;
use reqwest::header::{self, HeaderName, HeaderValue};
use sha2::{Digest, Sha256};
use reqwest::header::HeaderMap;
use std::collections::HashMap;
use std::str::FromStr;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct R2Client {
access_key: String,
secret_key: String,
sigv4: Sigv4Client,
endpoint: String,
}
impl R2Client {
fn get_env() -> Result<(String, String, String), R2Error> {
let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"];
@@ -25,16 +23,14 @@ impl R2Client {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self {
access_key,
secret_key,
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key),
endpoint,
}
}
pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self {
Self {
access_key,
secret_key,
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key),
endpoint,
}
}
@@ -44,40 +40,47 @@ impl R2Client {
method: http::Method,
bucket: &str,
key: Option<&str>,
payload_hash: &str,
payload: impl AsRef<[u8]>,
content_type: Option<&str>,
) -> Result<header::HeaderMap, R2Error> {
extra_headers: Option<Vec<(String, String)>>,
) -> Result<HeaderMap, R2Error> {
let uri = http::Uri::from_str(&self.build_url(bucket, key))
.expect("invalid uri rip (make sure the build_url function works as intended)");
let mut headers = Vec::new();
if method == Method::GET {
let mut headers = extra_headers.unwrap_or_default();
headers.push((
"x-amz-content-sha256".to_string(),
"UNSIGNED-PAYLOAD".to_string(),
))
}
"host".to_string(),
uri.host().expect("Should have host in URI").to_owned(),
));
if let Some(content_type) = content_type {
headers.push(("content-type".to_string(), content_type.to_owned()))
}
let (_, headers) = aws_signing::signature(
method,
uri,
headers,
payload_hash,
"s3",
"us-east-1",
&self.secret_key,
&self.access_key,
);
let mut header_map = header::HeaderMap::new();
for header in headers {
header_map.insert(
HeaderName::from_lowercase(&header.0.to_lowercase().as_bytes())
.expect("shit tragic"),
HeaderValue::from_str(&header.1).expect("shit more tragic"),
);
}
let (_, header_map) = self.sigv4.signature(method, uri, headers, payload);
// // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap
// directly.
// // Granted this impl had much better handling, fuck it we ball.
// let mut header_map = header::HeaderMap::new();
// for header in headers {
// header_map.insert(
// HeaderName::from_lowercase(header.0.to_lowercase().as_bytes())
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header name out of {}",
// header.0.to_lowercase()
// )
// })
// .unwrap(),
// HeaderValue::from_str(&header.1)
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}",
// header.1,
// header.0.to_lowercase(),
// )
// })
// .unwrap(),
// );
// }
Ok(header_map)
}
@@ -88,9 +91,12 @@ impl R2Client {
r2_file_key: &str,
content_type: Option<&str>,
) -> Result<(), R2Error> {
// --- Hash Payload --
let file_data = std::fs::read(local_file_path)?;
let payload_hash = hex::encode(Sha256::digest(&file_data));
// Payload (file data)
let payload = std::fs::read(local_file_path)?;
println!(
"[upload_file] Payload hash for signing: {}",
crate::aws_signing::hash(&payload)
);
// Set HTTP Headers
let content_type = if let Some(content_type) = content_type {
@@ -102,15 +108,17 @@ impl R2Client {
Method::PUT,
bucket,
Some(r2_file_key),
&payload_hash,
&payload,
content_type,
None,
)?;
let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key);
println!("[upload_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(r2_file_key));
let client = reqwest::Client::new();
let resp = client
.put(&file_url)
.headers(headers)
.body(file_data)
.body(payload)
.send()
.await?;
let status = resp.status();
@@ -118,10 +126,13 @@ impl R2Client {
if status.is_success() {
Ok(())
} else {
Err(R2Error::Other(format!(
"Upload failed with status {}: {}",
status, text
)))
Err(R2Error::FailedRequest(
format!(
"upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\""
),
status,
text,
))
}
}
pub async fn download_file(
@@ -129,36 +140,35 @@ impl R2Client {
bucket: &str,
key: &str,
local_path: &str,
extra_headers: Option<Vec<(String, String)>>,
) -> Result<(), R2Error> {
let payload_hash = hex::encode(Sha256::digest(""));
let content_type = Mime::get_mimetype_from_fp(local_path);
let headers = self.create_headers(
Method::GET,
bucket,
Some(key),
&payload_hash,
Some(content_type),
)?;
let file_url = format!("{}/{}/{}", self.endpoint, bucket, key);
// https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request.
// I don't know if I should trust it though, I don't see public impls with this.
let payload = "";
println!("[download_file] Payload for signing: (empty)");
let headers =
self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?;
println!("[download_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(key));
let client = reqwest::Client::new();
let resp = client.get(&file_url).headers(headers).send().await?;
let status = resp.status();
let content = resp.bytes().await?;
if status.is_success() {
std::fs::write(local_path, &content)?;
std::fs::write(local_path, resp.bytes().await?)?;
Ok(())
} else {
Err(R2Error::Other(format!(
"Download failed with status {}",
status
)))
Err(R2Error::FailedRequest(
format!("dowloading file \"{key}\" from bucket \"{bucket}\""),
status,
resp.text().await?,
))
}
}
async fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> {
let payload_hash = "UNSIGNED-PAYLOAD";
let content_type = "application/xml";
let headers =
self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?;
let payload = "";
println!("[get_bucket_listing] Payload for signing: (empty)");
let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?;
println!("[get_bucket_listing] Headers sent to request: {headers:#?}");
let url = self.build_url(bucket, None);
let client = reqwest::Client::new();
let resp = client
@@ -169,9 +179,13 @@ impl R2Client {
.map_err(R2Error::from)?;
let status = resp.status();
if status.is_success() {
resp.text().await.map_err(R2Error::from)
Ok(resp.text().await.map_err(R2Error::from)?)
} else {
Err(R2Error::Other(format!("Failed to list bucket: {}", status)))
Err(R2Error::FailedRequest(
String::from("list bucket...folders or something idfk"),
status,
resp.text().await.map_err(R2Error::from)?,
))
}
}
@@ -220,20 +234,17 @@ impl R2Client {
fn build_url(&self, bucket: &str, key: Option<&str>) -> String {
match key {
Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k),
Some(k) => {
let encoded_key = crate::aws_signing::url_encode(k);
format!("{}/{}/{}", self.endpoint, bucket, encoded_key)
}
None => format!("{}/{}/", self.endpoint, bucket),
}
}
}
impl Default for R2Client {
fn default() -> Self {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self {
access_key,
secret_key,
endpoint,
}
Self::new()
}
}
@@ -254,11 +265,12 @@ mod tests {
fn r2client_env() {
let r2client = r2client_from_env();
assert_eq!(r2client.access_key, "AKIAEXAMPLE");
assert_eq!(
r2client.secret_key,
"wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"
);
// Sorry but I don't know if I should have the keys on the sigv4 pub or not yet
// assert_eq!(r2client.access_key, "AKIAEXAMPLE");
// assert_eq!(
// r2client.secret_key,
// "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"
// );
assert_eq!(
r2client.endpoint,
"https://example.r2.cloudflarestorage.com"
@@ -279,6 +291,7 @@ mod tests {
Some("key"),
"deadbeef",
Some("application/octet-stream"),
None,
)
.unwrap();
assert!(headers.contains_key("x-amz-date"));

View File

@@ -4,6 +4,8 @@ use sha2::{Digest, Sha256};
type Hmac256 = Hmac<Sha256>;
const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
// --- Utility functions ---
fn lowercase(string: &str) -> String {
string.to_lowercase()
@@ -27,11 +29,11 @@ fn trim(string: &str) -> String {
string.trim().to_string()
}
fn hash<T: AsRef<[u8]>>(payload: T) -> String {
pub fn hash<T: AsRef<[u8]>>(payload: T) -> String {
hex(sha256hash(payload))
}
fn url_encode(url: &str) -> String {
pub fn url_encode(url: &str) -> String {
let mut url = urlencoding::encode(url).into_owned();
let encoded_to_replacement: [(&str, &str); 4] =
[("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")];
@@ -41,13 +43,16 @@ fn url_encode(url: &str) -> String {
url
}
// --- Signing Functions ---
// These don't use any parts of the Sigv4Client, so they are external
// --- Canonical request ---
fn create_canonical_request(
method: http::Method,
uri: http::Uri,
mut headers: Vec<(String, String)>,
hashed_payload: &str,
) -> (String, String, String) {
) -> (String, Vec<(String, String)>, String) {
// HTTPMethod
let http_method = method.to_string();
@@ -70,14 +75,14 @@ fn create_canonical_request(
pairs.sort_by(|a, b| a.0.cmp(&b.0));
pairs
.into_iter()
.map(|(k, v)| format!("{}={}", k, v))
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("&")
} else {
String::new()
};
// Ensure required headers (host and x-amz-date) are present
// checks for proper host headers
let host = uri
.host()
.expect("uri passed without a proper host")
@@ -86,6 +91,16 @@ fn create_canonical_request(
headers.push(("host".to_string(), host));
}
if !headers
.iter()
.any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256"))
{
headers.push((
"x-amz-content-sha256".to_string(),
hashed_payload.to_owned(),
))
}
// CanonicalHeaders + SignedHeaders
let mut http_headers = headers
.iter()
@@ -95,7 +110,7 @@ fn create_canonical_request(
let canonical_headers: String = http_headers
.iter()
.map(|(k, v)| format!("{}:{}\n", k, v))
.map(|(k, v)| format!("{k}:{v}\n"))
.collect();
let signed_headers: String = http_headers
@@ -106,63 +121,129 @@ fn create_canonical_request(
// Final canonical request
let canonical_request = format!(
"{}\n{}\n{}\n{}\n{}\n{}",
http_method,
canonical_uri,
canonical_query_string,
canonical_headers,
signed_headers,
hashed_payload
"{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}"
);
(canonical_request, signed_headers, canonical_headers)
(canonical_request, http_headers, signed_headers)
}
fn credential_scope(date: &str, region: &str, service: &str) -> String {
format!(
"{}/{}/{}/aws4_request",
date,
lowercase(region),
lowercase(service)
)
}
fn string_to_sign(scope: &str, amz_date: &str, canonical_request: &str) -> String {
format!(
"{}\n{}\n{}\n{}",
"AWS4-HMAC-SHA256",
amz_date,
scope,
hex(sha256hash(canonical_request))
)
}
fn derive_signing_key(key: &str, date: &str, region: &str, service: &str) -> Vec<u8> {
let secret_key = format!("AWS4{}", key);
let date_key = hmac_sha256(secret_key.as_bytes(), date);
let date_region_key = hmac_sha256(&date_key, region);
let date_region_service_key = hmac_sha256(&date_region_key, service);
hmac_sha256(&date_region_service_key, "aws4_request")
}
fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec<u8> {
hmac_sha256(signing_key, string_to_sign)
}
fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String {
format!(
"{}\n{}\n{}\n{}",
"AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request
)
}
/// Structure containing all the data relevant for an AWS Service utilizing SigV4.
/// Service: String containing the AWS service (e.g. "ec2" or "s3")
/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1")
/// Access Key: The "Access Key" to use with the AWS service (crazy, ik)
/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah)
///
/// ```
///
///
/// use aws_signing::Sigv4Client;
///
/// let s3_client = Sigv4Client::new(
/// "s3",
/// "us-east-1",
/// std::env::var("S3_ACCESS_KEY").unwrap(),
/// std::env::var("S3_SECRET_KEY").unwrap(),
/// )
/// let (_, request_headers) = s3client.signature(
/// http::Method::GET,
/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"),
/// vec![("content-type", "text/plain")],
/// "" // Since it's a GET request, the payload is ""
/// )
/// ```
#[derive(Debug)]
// A more mature client would also have session_key: Option<String>, but not my problem
pub struct Sigv4Client {
// Would it makes more sense for these to be type generics
// with trait param ToString?
// Either that or just &str or String...wait, union?
// Nah there has to be a better way to do it than...that
// but I don't wanna enum!!!
service: String,
region: String,
// Would it makes more sense for these to be type generics
// with trait param AsRef<[u8]>?
access_key: String,
secret_key: String,
}
/// NOTE: This only impliments functions that require one of the Sigv4Client fields.
/// For other functions related to the signing proccess, they are defined above, including the
/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html
impl Sigv4Client {
/// Creates a new instance of the Sigv4Client for a particular service, in a region, with your
/// private and public access keys.
///
/// For some reason this function will take any values that impl Into<String>, so you can pass
/// &str, String, or something else if you decide to get freaky.
pub fn new(
service: impl Into<String>,
region: impl Into<String>,
pub_key: impl Into<String>,
priv_key: impl Into<String>,
) -> Self {
Self {
service: service.into(),
region: region.into(),
access_key: pub_key.into(),
secret_key: priv_key.into(),
}
}
// In a more mature client, this might be an enum of AWSRegions
// I also don't even know if this could ever be useful lol, wouldn't you have individual
// clients for each region or use "auto" for AWS to figure it out for you? whatever.
pub fn set_region(&mut self, region: impl Into<String>) {
self.region = region.into()
}
fn credential_scope(&self, date: &str) -> String {
format!(
"{}/{}/{}/aws4_request",
date,
lowercase(&self.region),
lowercase(&self.service)
)
}
fn derive_signing_key(&self, date: &str) -> Vec<u8> {
let secret_key = &self.secret_key;
let key = format!("AWS4{secret_key}");
let date_key = hmac_sha256(key.as_bytes(), date);
let date_region_key = hmac_sha256(&date_key, &self.region);
let date_region_service_key = hmac_sha256(&date_region_key, &self.service);
hmac_sha256(&date_region_service_key, "aws4_request")
}
// --- API ---
pub fn signature(
/// This is the only function to use <3
pub fn signature<T: AsRef<[u8]>>(
&self,
method: http::Method,
uri: http::Uri,
// Should probably make this a header map, then turn it into a Vec(String, String) to sort
// by header name cause Amazon said so.
mut headers: Vec<(String, String)>,
hashed_payload: &str,
service: &str,
region: &str,
secret_key: &str,
access_key: &str,
) -> (String, Vec<(String, String)>) {
payload: T,
) -> (String, http::HeaderMap) {
let auth_algorithm = "AWS4-HMAC-SHA256";
let now = Utc::now();
let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string();
let date_stamp = now.format("%Y%m%d").to_string();
let date = now.format("%Y%m%d").to_string();
let payload_as_bytes = payload.as_ref();
let payload_hash = if payload_as_bytes.is_empty() {
EMPTY_PAYLOAD_HASH.to_string()
} else {
hash(payload_as_bytes)
};
// Add x-amz-date header if not already present
if !headers
@@ -173,25 +254,41 @@ pub fn signature(
}
// Canonical request
let (canonical_request, signed_headers, _canonical_headers) =
create_canonical_request(method, uri, headers.clone(), hashed_payload);
let (canonical_request, mut headers, signed_headers) =
create_canonical_request(method, uri, headers, &payload_hash);
// String to sign
let scope = credential_scope(&date_stamp, region, service);
let string_to_sign = string_to_sign(&scope, &amz_date, &canonical_request);
let scope = self.credential_scope(&date);
let hashed_canonical_request = hash(&canonical_request);
let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request);
// Signing key + signature
let signing_key = derive_signing_key(secret_key, &date_stamp, region, service);
let signing_key = self.derive_signing_key(&date);
let signature = hex(calculate_signature(&signing_key, &string_to_sign));
// Authorization header
let credential = format!("{}/{}", access_key, scope);
let access_key = &self.access_key;
let credential = format!("{access_key}/{scope}");
let auth_header = format!(
"{} Credential={}, SignedHeaders={}, Signature={}",
"AWS4-HMAC-SHA256", credential, signed_headers, signature
"{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}"
);
println!("\n--- AWS SigV4 Debug ---");
println!("1. CanonicalRequest:\n---\n{canonical_request}\n---");
println!("2. StringToSign:\n---\n{string_to_sign}\n---");
println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key));
println!("4. Signature:\n---\n{signature}\n---");
println!("5. Authorization Header:\n---\n{auth_header}\n---");
headers.push(("authorization".to_string(), auth_header));
(signature, headers)
let mut header_map: http::HeaderMap = http::HeaderMap::new();
for (header, value) in headers.clone() {
header_map.insert(
http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(),
http::HeaderValue::from_str(&value).unwrap(),
);
}
(signature, header_map)
}
}

View File

@@ -10,6 +10,6 @@ pub enum R2Error {
Xml(#[from] xmltree::ParseError),
#[error("Missing environment varibles: {0}")]
Env(String),
#[error("Other: {0}")]
Other(String),
#[error("Request failed during operation {0}: {1}\n{2}")]
FailedRequest(String, http::StatusCode, String),
}

View File

@@ -1,14 +1,21 @@
use crate::R2Client;
use crate::R2Error;
use crate::sync::R2Client;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct R2Bucket {
bucket: String,
pub client: R2Client,
}
impl R2Bucket {
pub fn new(bucket: String, client: R2Client) -> Self {
pub fn new(bucket: String) -> Self {
Self {
bucket,
client: R2Client::new(),
}
}
pub fn from_client(bucket: String, client: R2Client) -> Self {
Self { bucket, client }
}
@@ -22,41 +29,41 @@ impl R2Bucket {
Self { bucket, client }
}
pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> {
pub async fn upload_file(
&self,
local_file_path: &str,
r2_file_key: &str,
) -> Result<(), R2Error> {
self.client
.upload_file(&self.bucket, local_file_path, r2_file_key)
// I'm pasing None to let the R2Client derive the content type from the local_file_path
.upload_file(&self.bucket, local_file_path, r2_file_key, None)
.await
}
pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> {
pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> {
self.client
.download_file(&self.bucket, r2_file_key, local_path)
.download_file(&self.bucket, r2_file_key, local_path, None)
.await
}
pub fn list_files(&self) -> Result<std::collections::HashMap<String, Vec<String>>, R2Error> {
self.client.list_files(&self.bucket)
pub async fn list_files(
&self,
) -> Result<std::collections::HashMap<String, Vec<String>>, R2Error> {
self.client.list_files(&self.bucket).await
}
pub fn list_folders(&self) -> Result<Vec<String>, R2Error> {
self.client.list_folders(&self.bucket)
pub async fn list_folders(&self) -> Result<Vec<String>, R2Error> {
self.client.list_folders(&self.bucket).await
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sync::R2Bucket;
use std::env;
fn get_test_bucket() -> R2Bucket {
dotenv::dotenv().ok();
let access_key =
env::var("R2_ACCESS_KEY").unwrap_or_else(|_| "test_access_key".to_string());
let secret_key =
env::var("R2_SECRET_KEY").unwrap_or_else(|_| "test_secret_key".to_string());
let endpoint = env::var("R2_ENDPOINT")
.unwrap_or_else(|_| "https://example.r2.cloudflarestorage.com".to_string());
let client = R2Client::from_credentials(access_key, secret_key, endpoint);
R2Bucket::new("test-bucket".to_string(), client)
R2Bucket::new("test-bucket".to_string())
}
#[test]
@@ -64,4 +71,12 @@ mod tests {
let bucket = get_test_bucket();
assert_eq!(bucket.bucket, "test-bucket");
}
// Example async test (requires a runtime, so ignored by default)
// #[tokio::test]
// async fn test_upload_file() {
// let bucket = get_test_bucket();
// let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await;
// assert!(result.is_ok());
// }
}

View File

@@ -1,20 +1,16 @@
use crate::R2Error;
use crate::aws_signing::Sigv4Client;
use crate::mimetypes::Mime;
use chrono::Utc;
use hmac::{Hmac, Mac};
use reqwest::header::{self, HeaderMap, HeaderName, HeaderValue};
use sha2::{Digest, Sha256};
use http::Method;
use reqwest::header::HeaderMap;
use std::collections::HashMap;
use std::str::FromStr;
type HmacSHA256 = Hmac<Sha256>;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct R2Client {
access_key: String,
secret_key: String,
sigv4: Sigv4Client,
endpoint: String,
}
impl R2Client {
fn get_env() -> Result<(String, String, String), R2Error> {
let keys = ["R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_ENDPOINT"];
@@ -27,234 +23,165 @@ impl R2Client {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self {
access_key,
secret_key,
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key),
endpoint,
}
}
pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self {
Self {
access_key,
secret_key,
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key),
endpoint,
}
}
fn sign(&self, key: &[u8], msg: &str) -> Vec<u8> {
let mut mac = HmacSHA256::new_from_slice(key).expect("Invalid key length");
mac.update(msg.as_bytes());
mac.finalize().into_bytes().to_vec()
}
fn get_signature_key(&self, date_stamp: &str, region: &str, service: &str) -> Vec<u8> {
let aws4_secret: String = format!("AWS4{}", self.secret_key);
let k_date = self.sign(aws4_secret.as_bytes(), date_stamp);
let k_region = self.sign(&k_date, region);
let k_service = self.sign(&k_region, service);
self.sign(&k_service, "aws4_request")
}
fn create_headers(
&self,
method: &str,
method: http::Method,
bucket: &str,
key: &str,
payload_hash: &str,
content_type: &str,
) -> Result<header::HeaderMap, R2Error> {
// Robustly extract host from endpoint
let endpoint = self.endpoint.trim_end_matches('/');
// Not proud of this, it is really dumb and hard to read, but it'll work I suppose...I think...
let host = endpoint
.split("//")
.nth(1)
.unwrap_or(endpoint)
.split('/')
.next()
.unwrap_or(endpoint)
.split(':')
.next()
.unwrap_or(endpoint)
.trim();
if host.is_empty() {
return Err(R2Error::Other(
"Host header could not be determined from endpoint".to_string(),
key: Option<&str>,
payload: impl AsRef<[u8]>,
content_type: Option<&str>,
) -> Result<HeaderMap, R2Error> {
let uri = http::Uri::from_str(&self.build_url(bucket, key))
.expect("invalid uri rip (make sure the build_url function works as intended)");
let mut headers = Vec::new();
headers.push((
"host".to_string(),
uri.host().expect("Should have host in URI").to_owned(),
));
if let Some(content_type) = content_type {
headers.push(("content-type".to_string(), content_type.to_owned()))
}
let t = Utc::now();
let amz_date = t.format("%Y%m%dT%H%M%SZ").to_string();
let date_stamp = t.format("%Y%m%d").to_string();
let mut headers_vec = [
("host", host),
("x-amz-date", &amz_date),
("x-amz-content-sha256", payload_hash),
("content-type", content_type),
];
headers_vec.sort_by(|a, b| a.0.cmp(b.0));
let signed_headers = headers_vec
.iter()
.map(|(k, _)| *k)
.collect::<Vec<_>>()
.join(";");
let canonical_headers = headers_vec
.iter()
.map(|(k, v)| format!("{}:{}\n", k.to_lowercase(), v))
.collect::<String>();
let canonical_uri = format!("/{}/{}", bucket, key);
let canonical_request = format!(
"{method}\n{uri}\n\n{headers}\n{signed_headers}\n{payload_hash}",
method = method,
uri = canonical_uri,
headers = canonical_headers,
signed_headers = signed_headers,
payload_hash = payload_hash
);
let credential_scope = format!("{}/{}/s3/aws4_request", date_stamp, "auto");
let hashed_request = hex::encode(Sha256::digest(canonical_request.as_bytes()));
let string_to_sign = format!(
"AWS4-HMAC-SHA256\n{amz_date}\n{credential_scope}\n{hashed_request}",
amz_date = amz_date,
credential_scope = credential_scope,
hashed_request = hashed_request
);
let signing_key = self.get_signature_key(&date_stamp, "auto", "s3");
let signature = hex::encode(self.sign(&signing_key, &string_to_sign));
let authorization = format!(
"AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}",
self.access_key, credential_scope, signed_headers, signature
);
// Print all headers for debugging
println!("[r2client] DEBUG: Built headers:");
println!(" host: {}", host);
println!(" x-amz-date: {}", amz_date);
println!(" x-amz-content-sha256: {}", payload_hash);
println!(" content-type: {}", content_type);
println!(" authorization: {}", authorization);
println!(" signed_headers: {}", signed_headers);
println!(
" canonical_headers: {}",
canonical_headers.replace("\n", "\\n")
);
println!(
" canonical_request: {}",
canonical_request.replace("\n", "\\n")
);
println!(" string_to_sign: {}", string_to_sign.replace("\n", "\\n"));
println!(" signature: {}", signature);
let mut header_map = HeaderMap::new();
header_map.insert(
HeaderName::from_static("x-amz-date"),
HeaderValue::from_str(&amz_date)
.map_err(|e| R2Error::Other(format!("Invalid x-amz-date: {e}")))?,
);
header_map.insert(
HeaderName::from_static("x-amz-content-sha256"),
HeaderValue::from_str(payload_hash).map_err(|e| {
R2Error::Other(format!(
"Invalid x-amz-content-sha256: {payload_hash:?}: {e}"
))
})?,
);
header_map.insert(
HeaderName::from_static("authorization"),
HeaderValue::from_str(&authorization).map_err(|e| {
R2Error::Other(format!(
"Invalid authorization: {e}\nValue: {authorization}"
))
})?,
);
header_map.insert(
HeaderName::from_static("content-type"),
HeaderValue::from_str(content_type)
.map_err(|e| R2Error::Other(format!("Invalid content-type: {e}")))?,
);
header_map.insert(
HeaderName::from_static("host"),
HeaderValue::from_str(host)
.map_err(|e| R2Error::Other(format!("Invalid host: {e}")))?,
);
let (_, header_map) = self.sigv4.signature(method, uri, headers, payload);
// // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap
// directly.
// // Granted this impl had much better handling, fuck it we ball.
// let mut header_map = header::HeaderMap::new();
// for header in headers {
// header_map.insert(
// HeaderName::from_lowercase(header.0.to_lowercase().as_bytes())
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header name out of {}",
// header.0.to_lowercase()
// )
// })
// .unwrap(),
// HeaderValue::from_str(&header.1)
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}",
// header.1,
// header.0.to_lowercase(),
// )
// })
// .unwrap(),
// );
// }
Ok(header_map)
}
pub fn upload_file(
pub async fn upload_file(
&self,
bucket: &str,
local_file_path: &str,
r2_file_key: &str,
content_type: Option<&str>,
) -> Result<(), R2Error> {
let file_data = std::fs::read(local_file_path)?;
let mut hasher = Sha256::new();
hasher.update(&file_data);
let payload_hash = hex::encode(hasher.finalize());
// let content_type = "application/octet-stream";
let content_type = Mime::get_mimetype_from_fp(local_file_path);
let headers =
self.create_headers("PUT", bucket, r2_file_key, &payload_hash, content_type)?;
let file_url = format!("{}/{}/{}", self.endpoint, bucket, r2_file_key);
let client = reqwest::blocking::Client::new();
// Payload (file data)
let payload = std::fs::read(local_file_path)?;
// Set HTTP Headers
let content_type = if let Some(content_type) = content_type {
Some(content_type)
} else {
Some(Mime::get_mimetype_from_fp(local_file_path))
};
let headers = self.create_headers(
Method::PUT,
bucket,
Some(r2_file_key),
&payload,
content_type,
)?;
let file_url = self.build_url(bucket, Some(r2_file_key));
let client = reqwest::Client::new();
let resp = client
.put(&file_url)
.headers(headers)
.body(file_data)
.send()?;
.body(payload)
.send()
.await?;
let status = resp.status();
let text = resp.text()?;
let text = resp.text().await?;
if status.is_success() {
Ok(())
} else {
Err(R2Error::Other(format!(
"Upload failed with status {}: {}",
status, text
)))
Err(R2Error::FailedRequest(
format!(
"upload file {local_file_path} to bucket \"{bucket}\" under file key \"{r2_file_key}\""
),
status,
text,
))
}
}
pub fn download_file(&self, bucket: &str, key: &str, local_path: &str) -> Result<(), R2Error> {
let payload_hash = "UNSIGNED-PAYLOAD";
let content_type = "application/octet-stream";
let headers = self.create_headers("GET", bucket, key, payload_hash, content_type)?;
pub async fn download_file(
&self,
bucket: &str,
key: &str,
local_path: &str,
) -> Result<(), R2Error> {
// https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request.
// I don't know if I should trust it though, I don't see public impls with this.
let payload = "UNSIGNED-PAYLOAD";
let content_type = Mime::get_mimetype_from_fp(local_path);
let headers =
self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?;
let file_url = format!("{}/{}/{}", self.endpoint, bucket, key);
let client = reqwest::blocking::Client::new();
let resp = client.get(&file_url).headers(headers).send()?;
let client = reqwest::Client::new();
let resp = client.get(&file_url).headers(headers).send().await?;
let status = resp.status();
let content = resp.bytes()?;
if status.is_success() {
std::fs::write(local_path, &content)?;
std::fs::write(local_path, resp.bytes().await?)?;
Ok(())
} else {
Err(R2Error::Other(format!(
"Download failed with status {}",
status
)))
Err(R2Error::FailedRequest(
format!("dowloading file \"{key}\" from bucket \"{bucket}\""),
status,
resp.text().await?,
))
}
}
fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> {
async fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> {
let payload_hash = "UNSIGNED-PAYLOAD";
let content_type = "application/xml";
let headers = self.create_headers("GET", bucket, "", payload_hash, content_type)?;
let headers =
self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?;
let url = self.build_url(bucket, None);
let client = reqwest::blocking::Client::new();
let client = reqwest::Client::new();
let resp = client
.get(&url)
.headers(headers)
.send()
.await
.map_err(R2Error::from)?;
let status = resp.status();
if status.is_success() {
resp.text().map_err(R2Error::from)
Ok(resp.text().await.map_err(R2Error::from)?)
} else {
Err(R2Error::Other(format!("Failed to list bucket: {}", status)))
Err(R2Error::FailedRequest(
String::from("list bucket...folders or something idfk"),
status,
resp.text().await.map_err(R2Error::from)?,
))
}
}
/// List all files in the specified bucket. Returns a HashMap of folder -> `Vec<file>`.
pub fn list_files(&self, bucket: &str) -> Result<HashMap<String, Vec<String>>, R2Error> {
let xml = self.get_bucket_listing(bucket)?;
pub async fn list_files(&self, bucket: &str) -> Result<HashMap<String, Vec<String>>, R2Error> {
let xml = self.get_bucket_listing(bucket).await?;
let mut files_dict: HashMap<String, Vec<String>> = HashMap::new();
let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?;
for content in root
@@ -276,9 +203,8 @@ impl R2Client {
Ok(files_dict)
}
/// List all folders in the specified bucket. Returns a Vec of folder names.
pub fn list_folders(&self, bucket: &str) -> Result<Vec<String>, R2Error> {
let xml = self.get_bucket_listing(bucket)?;
pub async fn list_folders(&self, bucket: &str) -> Result<Vec<String>, R2Error> {
let xml = self.get_bucket_listing(bucket).await?;
let mut folders = std::collections::HashSet::new();
let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?;
for content in root
@@ -306,13 +232,7 @@ impl R2Client {
}
impl Default for R2Client {
fn default() -> Self {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self {
access_key,
secret_key,
endpoint,
}
Self::new()
}
}
@@ -333,36 +253,18 @@ mod tests {
fn r2client_env() {
let r2client = r2client_from_env();
assert_eq!(r2client.access_key, "AKIAEXAMPLE");
assert_eq!(
r2client.secret_key,
"wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"
);
// Sorry but I don't know if I should have the keys on the sigv4 pub or not yet
// assert_eq!(r2client.access_key, "AKIAEXAMPLE");
// assert_eq!(
// r2client.secret_key,
// "wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY"
// );
assert_eq!(
r2client.endpoint,
"https://example.r2.cloudflarestorage.com"
);
}
#[test]
fn test_sign_and_signature_key() {
let client = R2Client::from_credentials(
"AKIAEXAMPLE".to_string(),
"wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY".to_string(),
"https://example.r2.cloudflarestorage.com".to_string(),
);
let key = b"testkey";
let msg = "testmsg";
let sig = client.sign(key, msg);
assert_eq!(sig.len(), 32); // HMAC-SHA256 output is 32 bytes
let date = "20250101";
let region = "auto";
let service = "s3";
let signing_key = client.get_signature_key(date, region, service);
assert_eq!(signing_key.len(), 32);
}
#[test]
fn test_create_headers() {
let client = R2Client::from_credentials(
@@ -372,11 +274,11 @@ mod tests {
);
let headers = client
.create_headers(
"PUT",
Method::PUT,
"bucket",
"key",
Some("key"),
"deadbeef",
"application/octet-stream",
Some("application/octet-stream"),
)
.unwrap();
assert!(headers.contains_key("x-amz-date"));

137
tests/r2_tests.rs Normal file
View File

@@ -0,0 +1,137 @@
use std::env;
use std::fs;
use std::io::Write;
fn create_test_file(path: &str, content: &str) {
let mut file = fs::File::create(path).unwrap();
file.write_all(content.as_bytes()).unwrap();
}
#[cfg(feature = "sync")]
mod sync_tests {
use super::create_test_file;
use r2client::sync::R2Bucket;
use std::env;
use std::fs;
fn setup_bucket() -> R2Bucket {
dotenv::dotenv().ok();
let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests");
let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set");
let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set");
let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set");
R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint)
}
#[test]
fn test_sync_e2e() {
let bucket = setup_bucket();
let test_content = "Hello, R2 sync world!";
let local_upload_path = "test_upload_sync.txt";
let r2_file_key = "test/test_upload_sync.txt";
let local_download_path = "test_download_sync.txt";
create_test_file(local_upload_path, test_content);
// 1. Upload file
bucket
.upload_file(local_upload_path, r2_file_key)
.expect("Sync upload failed");
// 2. List files and check if it exists
let files = bucket.list_files().expect("Sync list_files failed");
assert!(
files
.get("test")
.unwrap()
.contains(&"test_upload_sync.txt".to_string())
);
// 3. List folders and check if it exists
let folders = bucket.list_folders().expect("Sync list_folders failed");
assert!(folders.contains(&"test".to_string()));
// 4. Download file
bucket
.download_file(r2_file_key, local_download_path)
.expect("Sync download failed");
// 5. Verify content
let downloaded_content = fs::read_to_string(local_download_path).unwrap();
assert_eq!(test_content, downloaded_content);
// Cleanup
fs::remove_file(local_upload_path).unwrap();
fs::remove_file(local_download_path).unwrap();
// NOTE: The test file on R2 is not deleted as there is no delete API yet.
}
}
#[cfg(feature = "async")]
mod async_tests {
use super::create_test_file;
use r2client::R2Bucket;
use std::env;
use std::fs;
fn setup_bucket() -> R2Bucket {
dotenv::dotenv().ok();
let bucket = env::var("R2_BUCKET").expect("R2_BUCKET not set for integration tests");
let access_key = env::var("R2_ACCESS_KEY").expect("R2_ACCESS_KEY not set");
let secret_key = env::var("R2_SECRET_KEY").expect("R2_SECRET_KEY not set");
let endpoint = env::var("R2_ENDPOINT").expect("R2_ENDPOINT not set");
R2Bucket::from_credentials(bucket, access_key, secret_key, endpoint)
}
#[tokio::test]
async fn test_async_e2e() {
let bucket = setup_bucket();
let test_content = "Hello, R2 async world!";
let local_upload_path = "test_upload_async.txt";
let r2_file_key = "test/test_upload_async.txt";
let local_download_path = "test_download_async.txt";
create_test_file(local_upload_path, test_content);
// 0. List files to see if a get request will go through lol
let files = bucket.list_files().await.expect("Async list_files failed");
println!("{files:#?}");
// 1. Upload file
bucket
.upload_file(local_upload_path, r2_file_key)
.await
.expect("Async upload failed");
// 2. List files and check if it exists
let files = bucket.list_files().await.expect("Async list_files failed");
assert!(
files
.get("test")
.unwrap()
.contains(&"test_upload_async.txt".to_string())
);
// 3. List folders and check if it exists
let folders = bucket
.list_folders()
.await
.expect("Async list_folders failed");
assert!(folders.contains(&"test".to_string()));
// 4. Download file
bucket
.download_file(r2_file_key, local_download_path)
.await
.expect("Async download failed");
// 5. Verify content
let downloaded_content = fs::read_to_string(local_download_path).unwrap();
assert_eq!(test_content, downloaded_content);
// Cleanup
fs::remove_file(local_upload_path).unwrap();
fs::remove_file(local_download_path).unwrap();
// NOTE: The test file on R2 is not deleted as there is no delete API yet.
}
}

25
todo.md
View File

@@ -1,6 +1,29 @@
Okay I think I did everything, so to clean up:
- [ ] Update the sync library
# Fatal Bug
When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header.
Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed.
The output of that function includes request and headers with the host headers in there.
I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector
I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it
The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves.
Technically the output of the canonical_request is correct:
Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"`
However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure.
But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows:
Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"`
Which seems to match up, however the final signature that the main `signature` function returns is:
Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]`
So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one.
This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector
I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything.
Okay after saying all that, surely I could've just fixed it by now and will remember it in the later.
- [X] Update the sync library
- [X] Make a .env with test-bucket creds
- [ ] Actually test the damn thing
- [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?)