Pre-release commit

Anything below this first part is definitely me being astounded
This is the commit where it is in a working state that isn't exactly
primitive, but it's not comprehensive, fully fleshed out, or polished
either. But I like where it's going

You wouldn't believe it.
I came back to this after a MASSIVE crash out with how HUGE and CLUNKY
trying to use the AWS SDK was.
Well
Guess what
GUESS
Literally I just went to test it this morning and I freaking KID YOU NOT
it just worked.
no changes.
it just worked.
YOU WANNA KNOW HOW I CAN TELL?
Look at this FREAKING COMMIT and see that ALL THAT CHANGED was the
todo.md FILE
literally
what
the
fart

I shouldn't complain but OH MY GOODNESS
I spent SO LONG working on it and it turned out it WASN'T all for naught
because I guess something was just...stuck. Bad credentials somehow? I
don't know anymore.
I'm just glad it's over.
This commit is contained in:
ForeverPyrite
2025-09-19 21:02:12 -04:00
parent 8b3cc6cb3e
commit 76cc06d7e1
10 changed files with 252 additions and 605 deletions

View File

@@ -1,32 +1,23 @@
[package] [package]
name = "r2client" name = "r2client"
version = "0.1.0" version = "0.2.0"
edition = "2024" edition = "2024"
[lib] [lib]
[dependencies] [dependencies]
sha2 = "0.10.9" reqwest = "0.12.19"
bytes = "1.10.1"
reqwest = { version = "0.12.19", features = ["blocking"] }
chrono = "0.4.41"
hex = "0.4.3"
hmac = "0.12.1"
xmltree = "0.11.0" xmltree = "0.11.0"
thiserror = "2" thiserror = "2"
async-trait = "0.1.89"
async-std = { version = "1.0", optional = true }
tokio = { version = "1.0", features = ["rt-multi-thread"], optional = true }
futures-executor = { version = "0.3", optional = true }
urlencoding = "2.1.3"
http = "1.3.1" http = "1.3.1"
aws_sigv4 = { path = "../aws_sigv4/" }
log = "0.4.28"
[dev-dependencies] [dev-dependencies]
tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] } tokio = { version = "1", features = ["full", "macros", "rt-multi-thread"] }
dotenv = "0.15" dotenv = "0.15"
[features] [features]
async = ["tokio"] async = []
default = ["async"] default = ["async"]
sync = ["tokio/rt-multi-thread", "futures-executor"] sync = ["reqwest/blocking"]
async-std-runtime = ["async-std"]

View File

@@ -1,4 +1,4 @@
use crate::R2Client; use crate::_async::R2Client;
use crate::R2Error; use crate::R2Error;
#[derive(Debug)] #[derive(Debug)]
@@ -55,28 +55,8 @@ impl R2Bucket {
pub async fn list_folders(&self) -> Result<Vec<String>, R2Error> { pub async fn list_folders(&self) -> Result<Vec<String>, R2Error> {
self.client.list_folders(&self.bucket).await self.client.list_folders(&self.bucket).await
} }
}
#[cfg(test)] pub async fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> {
mod tests { self.client.delete(&self.bucket, r2_file_key).await
use super::*;
fn get_test_bucket() -> R2Bucket {
dotenv::dotenv().ok();
R2Bucket::new("test-bucket".to_string())
} }
#[test]
fn test_bucket_construction() {
let bucket = get_test_bucket();
assert_eq!(bucket.bucket, "test-bucket");
}
// Example async test (requires a runtime, so ignored by default)
// #[tokio::test]
// async fn test_upload_file() {
// let bucket = get_test_bucket();
// let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await;
// assert!(result.is_ok());
// }
} }

View File

@@ -1,14 +1,15 @@
use crate::R2Error; use crate::R2Error;
use crate::aws_signing::Sigv4Client; use crate::mimetypes::get_mimetype_from_fp;
use crate::mimetypes::Mime; use aws_sigv4::SigV4Credentials;
use http::Method; use http::Method;
use log::trace;
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
#[derive(Debug)] #[derive(Debug)]
pub struct R2Client { pub struct R2Client {
sigv4: Sigv4Client, sigv4: SigV4Credentials,
endpoint: String, endpoint: String,
} }
impl R2Client { impl R2Client {
@@ -23,14 +24,14 @@ impl R2Client {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self { Self {
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key),
endpoint, endpoint,
} }
} }
pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self {
Self { Self {
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key),
endpoint, endpoint,
} }
} }
@@ -56,31 +57,6 @@ impl R2Client {
} }
let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); let (_, header_map) = self.sigv4.signature(method, uri, headers, payload);
// // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap
// directly.
// // Granted this impl had much better handling, fuck it we ball.
// let mut header_map = header::HeaderMap::new();
// for header in headers {
// header_map.insert(
// HeaderName::from_lowercase(header.0.to_lowercase().as_bytes())
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header name out of {}",
// header.0.to_lowercase()
// )
// })
// .unwrap(),
// HeaderValue::from_str(&header.1)
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}",
// header.1,
// header.0.to_lowercase(),
// )
// })
// .unwrap(),
// );
// }
Ok(header_map) Ok(header_map)
} }
@@ -93,16 +69,16 @@ impl R2Client {
) -> Result<(), R2Error> { ) -> Result<(), R2Error> {
// Payload (file data) // Payload (file data)
let payload = std::fs::read(local_file_path)?; let payload = std::fs::read(local_file_path)?;
println!( trace!(
"[upload_file] Payload hash for signing: {}", "[upload_file] Payload hash for signing: {}",
crate::aws_signing::hash(&payload) aws_sigv4::hash(&payload)
); );
// Set HTTP Headers // Set HTTP Headers
let content_type = if let Some(content_type) = content_type { let content_type = if let Some(content_type) = content_type {
Some(content_type) Some(content_type)
} else { } else {
Some(Mime::get_mimetype_from_fp(local_file_path)) Some(get_mimetype_from_fp(local_file_path))
}; };
let headers = self.create_headers( let headers = self.create_headers(
Method::PUT, Method::PUT,
@@ -112,7 +88,7 @@ impl R2Client {
content_type, content_type,
None, None,
)?; )?;
println!("[upload_file] Headers sent to request: {headers:#?}"); trace!("[upload_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(r2_file_key)); let file_url = self.build_url(bucket, Some(r2_file_key));
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let resp = client let resp = client
@@ -145,10 +121,10 @@ impl R2Client {
// https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request.
// I don't know if I should trust it though, I don't see public impls with this. // I don't know if I should trust it though, I don't see public impls with this.
let payload = ""; let payload = "";
println!("[download_file] Payload for signing: (empty)"); trace!("[download_file] Payload for signing: (empty)");
let headers = let headers =
self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?; self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?;
println!("[download_file] Headers sent to request: {headers:#?}"); trace!("[download_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(key)); let file_url = self.build_url(bucket, Some(key));
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let resp = client.get(&file_url).headers(headers).send().await?; let resp = client.get(&file_url).headers(headers).send().await?;
@@ -164,11 +140,37 @@ impl R2Client {
)) ))
} }
} }
pub async fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> {
let payload = "";
trace!("[delete_file] Payload for signing: (empty)");
let headers = self.create_headers(
Method::DELETE,
bucket,
Some(remote_key),
payload,
None,
None,
)?;
trace!("[delete_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(remote_key));
let client = reqwest::Client::new();
let resp = client.delete(&file_url).headers(headers).send().await?;
let status = resp.status();
if status.is_success() {
Ok(())
} else {
Err(R2Error::FailedRequest(
format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""),
status,
resp.text().await?,
))
}
}
async fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> { async fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> {
let payload = ""; let payload = "";
println!("[get_bucket_listing] Payload for signing: (empty)"); trace!("[get_bucket_listing] Payload for signing: (empty)");
let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?; let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?;
println!("[get_bucket_listing] Headers sent to request: {headers:#?}"); trace!("[get_bucket_listing] Headers sent to request: {headers:#?}");
let url = self.build_url(bucket, None); let url = self.build_url(bucket, None);
let client = reqwest::Client::new(); let client = reqwest::Client::new();
let resp = client let resp = client
@@ -223,19 +225,19 @@ impl R2Client {
.filter(|e| e.name == "Contents") .filter(|e| e.name == "Contents")
{ {
let key_elem = content.get_child("Key").and_then(|k| k.get_text()); let key_elem = content.get_child("Key").and_then(|k| k.get_text());
if let Some(file_key) = key_elem { if let Some(file_key) = key_elem
if let Some(idx) = file_key.find('/') { && let Some(idx) = file_key.find('/')
{
folders.insert(file_key[..idx].to_string()); folders.insert(file_key[..idx].to_string());
} }
} }
}
Ok(folders.into_iter().collect()) Ok(folders.into_iter().collect())
} }
fn build_url(&self, bucket: &str, key: Option<&str>) -> String { fn build_url(&self, bucket: &str, key: Option<&str>) -> String {
match key { match key {
Some(k) => { Some(k) => {
let encoded_key = crate::aws_signing::url_encode(k); let encoded_key = aws_sigv4::url_encode(k);
format!("{}/{}/{}", self.endpoint, bucket, encoded_key) format!("{}/{}/{}", self.endpoint, bucket, encoded_key)
} }
None => format!("{}/{}/", self.endpoint, bucket), None => format!("{}/{}/", self.endpoint, bucket),

View File

@@ -1,294 +0,0 @@
use chrono::Utc;
use hmac::{Hmac, Mac};
use sha2::{Digest, Sha256};
type Hmac256 = Hmac<Sha256>;
const EMPTY_PAYLOAD_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
// --- Utility functions ---
fn lowercase(string: &str) -> String {
string.to_lowercase()
}
fn hex<T: AsRef<[u8]>>(data: T) -> String {
hex::encode(data)
}
fn sha256hash<T: AsRef<[u8]>>(data: T) -> [u8; 32] {
Sha256::digest(data).into()
}
fn hmac_sha256(signing_key: &[u8], message: &str) -> Vec<u8> {
let mut mac = Hmac256::new_from_slice(signing_key).expect("bad key :pensive:");
mac.update(message.as_bytes());
mac.finalize().into_bytes().to_vec()
}
fn trim(string: &str) -> String {
string.trim().to_string()
}
pub fn hash<T: AsRef<[u8]>>(payload: T) -> String {
hex(sha256hash(payload))
}
pub fn url_encode(url: &str) -> String {
let mut url = urlencoding::encode(url).into_owned();
let encoded_to_replacement: [(&str, &str); 4] =
[("+", "%20"), ("*", "%2A"), ("%7E", "~"), ("%2F", "/")];
for (encoded_chars_pattern, replacement) in encoded_to_replacement {
url = url.replace(encoded_chars_pattern, replacement)
}
url
}
// --- Signing Functions ---
// These don't use any parts of the Sigv4Client, so they are external
// --- Canonical request ---
fn create_canonical_request(
method: http::Method,
uri: http::Uri,
mut headers: Vec<(String, String)>,
hashed_payload: &str,
) -> (String, Vec<(String, String)>, String) {
// HTTPMethod
let http_method = method.to_string();
// CanonicalURI = *path only* (spec forbids scheme+host here)
let canonical_uri = if uri.path().is_empty() {
"/".to_string()
} else {
uri.path().to_string()
};
// CanonicalQueryString (URL-encoded, sorted by key)
let canonical_query_string = if let Some(query_string) = uri.query() {
let mut pairs = query_string
.split('&')
.map(|query| {
let (k, v) = query.split_once('=').unwrap_or((query, ""));
(url_encode(k), url_encode(v))
})
.collect::<Vec<_>>();
pairs.sort_by(|a, b| a.0.cmp(&b.0));
pairs
.into_iter()
.map(|(k, v)| format!("{k}={v}"))
.collect::<Vec<_>>()
.join("&")
} else {
String::new()
};
// checks for proper host headers
let host = uri
.host()
.expect("uri passed without a proper host")
.to_string();
if !headers.iter().any(|(k, _)| k.eq_ignore_ascii_case("host")) {
headers.push(("host".to_string(), host));
}
if !headers
.iter()
.any(|(k, _)| k.eq_ignore_ascii_case("x-amz-content-sha256"))
{
headers.push((
"x-amz-content-sha256".to_string(),
hashed_payload.to_owned(),
))
}
// CanonicalHeaders + SignedHeaders
let mut http_headers = headers
.iter()
.map(|(name, value)| (lowercase(name), trim(value)))
.collect::<Vec<_>>();
http_headers.sort_by(|(k1, _), (k2, _)| k1.cmp(k2));
let canonical_headers: String = http_headers
.iter()
.map(|(k, v)| format!("{k}:{v}\n"))
.collect();
let signed_headers: String = http_headers
.iter()
.map(|(k, _)| k.clone())
.collect::<Vec<_>>()
.join(";");
// Final canonical request
let canonical_request = format!(
"{http_method}\n{canonical_uri}\n{canonical_query_string}\n{canonical_headers}\n{signed_headers}\n{hashed_payload}"
);
(canonical_request, http_headers, signed_headers)
}
fn calculate_signature(signing_key: &[u8], string_to_sign: &str) -> Vec<u8> {
hmac_sha256(signing_key, string_to_sign)
}
fn string_to_sign(scope: &str, amz_date: &str, hashed_canonical_request: &str) -> String {
format!(
"{}\n{}\n{}\n{}",
"AWS4-HMAC-SHA256", amz_date, scope, hashed_canonical_request
)
}
/// Structure containing all the data relevant for an AWS Service utilizing SigV4.
/// Service: String containing the AWS service (e.g. "ec2" or "s3")
/// Region: String containing the AWS region you're working in (e.g. "auto" or "us-east-1")
/// Access Key: The "Access Key" to use with the AWS service (crazy, ik)
/// Secret Key: The "Secret Key" that is used for cryptographic signing for the AWS Service (woah)
///
/// ```
///
///
/// use aws_signing::Sigv4Client;
///
/// let s3_client = Sigv4Client::new(
/// "s3",
/// "us-east-1",
/// std::env::var("S3_ACCESS_KEY").unwrap(),
/// std::env::var("S3_SECRET_KEY").unwrap(),
/// )
/// let (_, request_headers) = s3client.signature(
/// http::Method::GET,
/// http::Uri::from_static("https://s3.us-east-1.amazonaws.com/example-bucket/file.txt"),
/// vec![("content-type", "text/plain")],
/// "" // Since it's a GET request, the payload is ""
/// )
/// ```
#[derive(Debug)]
// A more mature client would also have session_key: Option<String>, but not my problem
pub struct Sigv4Client {
// Would it makes more sense for these to be type generics
// with trait param ToString?
// Either that or just &str or String...wait, union?
// Nah there has to be a better way to do it than...that
// but I don't wanna enum!!!
service: String,
region: String,
// Would it makes more sense for these to be type generics
// with trait param AsRef<[u8]>?
access_key: String,
secret_key: String,
}
/// NOTE: This only impliments functions that require one of the Sigv4Client fields.
/// For other functions related to the signing proccess, they are defined above, including the
/// prequisite functions defined at https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html
impl Sigv4Client {
/// Creates a new instance of the Sigv4Client for a particular service, in a region, with your
/// private and public access keys.
///
/// For some reason this function will take any values that impl Into<String>, so you can pass
/// &str, String, or something else if you decide to get freaky.
pub fn new(
service: impl Into<String>,
region: impl Into<String>,
pub_key: impl Into<String>,
priv_key: impl Into<String>,
) -> Self {
Self {
service: service.into(),
region: region.into(),
access_key: pub_key.into(),
secret_key: priv_key.into(),
}
}
// In a more mature client, this might be an enum of AWSRegions
// I also don't even know if this could ever be useful lol, wouldn't you have individual
// clients for each region or use "auto" for AWS to figure it out for you? whatever.
pub fn set_region(&mut self, region: impl Into<String>) {
self.region = region.into()
}
fn credential_scope(&self, date: &str) -> String {
format!(
"{}/{}/{}/aws4_request",
date,
lowercase(&self.region),
lowercase(&self.service)
)
}
fn derive_signing_key(&self, date: &str) -> Vec<u8> {
let secret_key = &self.secret_key;
let key = format!("AWS4{secret_key}");
let date_key = hmac_sha256(key.as_bytes(), date);
let date_region_key = hmac_sha256(&date_key, &self.region);
let date_region_service_key = hmac_sha256(&date_region_key, &self.service);
hmac_sha256(&date_region_service_key, "aws4_request")
}
// --- API ---
/// This is the only function to use <3
pub fn signature<T: AsRef<[u8]>>(
&self,
method: http::Method,
uri: http::Uri,
// Should probably make this a header map, then turn it into a Vec(String, String) to sort
// by header name cause Amazon said so.
mut headers: Vec<(String, String)>,
payload: T,
) -> (String, http::HeaderMap) {
let auth_algorithm = "AWS4-HMAC-SHA256";
let now = Utc::now();
let amz_date = now.format("%Y%m%dT%H%M%SZ").to_string();
let date = now.format("%Y%m%d").to_string();
let payload_as_bytes = payload.as_ref();
let payload_hash = if payload_as_bytes.is_empty() {
EMPTY_PAYLOAD_HASH.to_string()
} else {
hash(payload_as_bytes)
};
// Add x-amz-date header if not already present
if !headers
.iter()
.any(|(k, _)| k.eq_ignore_ascii_case("x-amz-date"))
{
headers.push(("x-amz-date".to_string(), amz_date.clone()));
}
// Canonical request
let (canonical_request, mut headers, signed_headers) =
create_canonical_request(method, uri, headers, &payload_hash);
// String to sign
let scope = self.credential_scope(&date);
let hashed_canonical_request = hash(&canonical_request);
let string_to_sign = string_to_sign(&scope, &amz_date, &hashed_canonical_request);
// Signing key + signature
let signing_key = self.derive_signing_key(&date);
let signature = hex(calculate_signature(&signing_key, &string_to_sign));
// Authorization header
let access_key = &self.access_key;
let credential = format!("{access_key}/{scope}");
let auth_header = format!(
"{auth_algorithm} Credential={credential}, SignedHeaders={signed_headers}, Signature={signature}"
);
println!("\n--- AWS SigV4 Debug ---");
println!("1. CanonicalRequest:\n---\n{canonical_request}\n---");
println!("2. StringToSign:\n---\n{string_to_sign}\n---");
println!("3. SigningKey:\n---\n{}\n---", hex(&signing_key));
println!("4. Signature:\n---\n{signature}\n---");
println!("5. Authorization Header:\n---\n{auth_header}\n---");
headers.push(("authorization".to_string(), auth_header));
let mut header_map: http::HeaderMap = http::HeaderMap::new();
for (header, value) in headers.clone() {
header_map.insert(
http::HeaderName::from_lowercase(header.to_lowercase().as_bytes()).unwrap(),
http::HeaderValue::from_str(&value).unwrap(),
);
}
(signature, header_map)
}
}

View File

@@ -1,4 +1,3 @@
mod aws_signing;
mod error; mod error;
mod mimetypes; mod mimetypes;
pub use error::R2Error; pub use error::R2Error;
@@ -9,11 +8,3 @@ pub use _async::{R2Bucket, R2Client};
#[cfg(feature = "sync")] #[cfg(feature = "sync")]
pub mod sync; pub mod sync;
#[cfg(test)]
mod test {
// use crate::{R2Bucket, R2Client, sync};
#[test]
fn test() {}
}

View File

@@ -1,8 +1,3 @@
pub enum Mime{}
impl Mime {
pub fn get_mimetype(key: &str) -> &'static str { pub fn get_mimetype(key: &str) -> &'static str {
match key { match key {
// Image formats // Image formats
@@ -64,15 +59,16 @@ impl Mime {
// be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg") // be the file extension (e.g. "~/.config/test.jpeg" becomes "jpeg")
// This is formated back to ".jpeg" because it's how the match statement is working. // This is formated back to ".jpeg" because it's how the match statement is working.
// I could very easily change it but idk it was an interesting thing. // I could very easily change it but idk it was an interesting thing.
Self::get_mimetype( //
&format!( // Hey, so maybe you should change the match statement to not care about the '.'?
".{}", file_path.rsplit(".") // Then again this is just being used for this project, so I guess it doesn't really matter
get_mimetype(&format!(
".{}",
file_path
.rsplit(".")
.next() .next()
.unwrap_or("time_to_be_an_octet_stream_lmao") .unwrap_or("time_to_be_an_octet_stream_lmao")
) ))
)
}
} }
#[cfg(test)] #[cfg(test)]
@@ -81,23 +77,36 @@ mod tests {
#[test] #[test]
fn match_mime_test() { fn match_mime_test() {
assert_eq!(Mime::get_mimetype(".tar"), "application/x-tar"); assert_eq!(get_mimetype(".tar"), "application/x-tar");
} }
#[test] #[test]
fn default_mime_test() { fn default_mime_test() {
assert_eq!(Mime::get_mimetype(".bf"), "application/octet-stream"); assert_eq!(get_mimetype(".bf"), "application/octet-stream");
} }
#[test] #[test]
fn mime_from_file() { fn mime_from_file() {
assert_eq!(Mime::get_mimetype_from_fp("test.ico"), "image/x-icon"); assert_eq!(get_mimetype_from_fp("test.ico"), "image/x-icon");
} }
#[test] #[test]
fn mime_from_file_path() { fn mime_from_file_path() {
assert_eq!(Mime::get_mimetype_from_fp("/home/testuser/Documents/test.pdf"), "application/pdf"); assert_eq!(
assert_eq!(Mime::get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"), "text/plain") get_mimetype_from_fp("/home/testuser/Documents/test.pdf"),
"application/pdf"
);
assert_eq!(
get_mimetype_from_fp("./bucket_test/bucket_test_upload.txt"),
"text/plain"
)
} }
#[test]
fn no_ext() {
assert_eq!(
get_mimetype_from_fp("edge_case_lmao"),
"application/octet-stream"
)
}
} }

View File

@@ -1,4 +1,4 @@
use crate::R2Client; use crate::sync::R2Client;
use crate::R2Error; use crate::R2Error;
#[derive(Debug)] #[derive(Debug)]
@@ -29,54 +29,26 @@ impl R2Bucket {
Self { bucket, client } Self { bucket, client }
} }
pub async fn upload_file( pub fn upload_file(&self, local_file_path: &str, r2_file_key: &str) -> Result<(), R2Error> {
&self,
local_file_path: &str,
r2_file_key: &str,
) -> Result<(), R2Error> {
self.client self.client
// I'm pasing None to let the R2Client derive the content type from the local_file_path // I'm pasing None to let the R2Client derive the content type from the local_file_path
.upload_file(&self.bucket, local_file_path, r2_file_key, None) .upload_file(&self.bucket, local_file_path, r2_file_key, None)
.await
} }
pub async fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> { pub fn download_file(&self, r2_file_key: &str, local_path: &str) -> Result<(), R2Error> {
self.client self.client
.download_file(&self.bucket, r2_file_key, local_path, None) .download_file(&self.bucket, r2_file_key, local_path, None)
.await
} }
pub async fn list_files( pub fn list_files(&self) -> Result<std::collections::HashMap<String, Vec<String>>, R2Error> {
&self, self.client.list_files(&self.bucket)
) -> Result<std::collections::HashMap<String, Vec<String>>, R2Error> {
self.client.list_files(&self.bucket).await
} }
pub async fn list_folders(&self) -> Result<Vec<String>, R2Error> { pub fn list_folders(&self) -> Result<Vec<String>, R2Error> {
self.client.list_folders(&self.bucket).await self.client.list_folders(&self.bucket)
}
} }
#[cfg(test)] pub fn delete_file(&self, r2_file_key: &str) -> Result<(), R2Error> {
mod tests { self.client.delete(&self.bucket, r2_file_key)
use super::*;
fn get_test_bucket() -> R2Bucket {
dotenv::dotenv().ok();
R2Bucket::new("test-bucket".to_string())
} }
#[test]
fn test_bucket_construction() {
let bucket = get_test_bucket();
assert_eq!(bucket.bucket, "test-bucket");
}
// Example async test (requires a runtime, so ignored by default)
// #[tokio::test]
// async fn test_upload_file() {
// let bucket = get_test_bucket();
// let result = bucket.upload_file("Cargo.toml", "test-upload.toml").await;
// assert!(result.is_ok());
// }
} }

View File

@@ -1,14 +1,15 @@
use crate::R2Error; use crate::R2Error;
use crate::aws_signing::Sigv4Client; use crate::mimetypes::get_mimetype_from_fp;
use crate::mimetypes::Mime; use aws_sigv4::SigV4Credentials;
use http::Method; use http::Method;
use log::trace;
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::str::FromStr; use std::str::FromStr;
#[derive(Debug)] #[derive(Debug)]
pub struct R2Client { pub struct R2Client {
sigv4: Sigv4Client, sigv4: SigV4Credentials,
endpoint: String, endpoint: String,
} }
impl R2Client { impl R2Client {
@@ -23,14 +24,14 @@ impl R2Client {
let (access_key, secret_key, endpoint) = Self::get_env().unwrap(); let (access_key, secret_key, endpoint) = Self::get_env().unwrap();
Self { Self {
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key),
endpoint, endpoint,
} }
} }
pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self { pub fn from_credentials(access_key: String, secret_key: String, endpoint: String) -> Self {
Self { Self {
sigv4: Sigv4Client::new("s3", "auto", access_key, secret_key), sigv4: SigV4Credentials::new("s3", "auto", access_key, secret_key),
endpoint, endpoint,
} }
} }
@@ -42,10 +43,11 @@ impl R2Client {
key: Option<&str>, key: Option<&str>,
payload: impl AsRef<[u8]>, payload: impl AsRef<[u8]>,
content_type: Option<&str>, content_type: Option<&str>,
extra_headers: Option<Vec<(String, String)>>,
) -> Result<HeaderMap, R2Error> { ) -> Result<HeaderMap, R2Error> {
let uri = http::Uri::from_str(&self.build_url(bucket, key)) let uri = http::Uri::from_str(&self.build_url(bucket, key))
.expect("invalid uri rip (make sure the build_url function works as intended)"); .expect("invalid uri rip (make sure the build_url function works as intended)");
let mut headers = Vec::new(); let mut headers = extra_headers.unwrap_or_default();
headers.push(( headers.push((
"host".to_string(), "host".to_string(),
uri.host().expect("Should have host in URI").to_owned(), uri.host().expect("Should have host in URI").to_owned(),
@@ -55,35 +57,10 @@ impl R2Client {
} }
let (_, header_map) = self.sigv4.signature(method, uri, headers, payload); let (_, header_map) = self.sigv4.signature(method, uri, headers, payload);
// // I said fuck it but this is BS, I'm gonna try having signature return the HeaderMap
// directly.
// // Granted this impl had much better handling, fuck it we ball.
// let mut header_map = header::HeaderMap::new();
// for header in headers {
// header_map.insert(
// HeaderName::from_lowercase(header.0.to_lowercase().as_bytes())
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header name out of {}",
// header.0.to_lowercase()
// )
// })
// .unwrap(),
// HeaderValue::from_str(&header.1)
// .inspect_err(|e| {
// eprint!(
// "Sucks to suck: {e:?} from trying to get a header value out of {} for header {}",
// header.1,
// header.0.to_lowercase(),
// )
// })
// .unwrap(),
// );
// }
Ok(header_map) Ok(header_map)
} }
pub async fn upload_file( pub fn upload_file(
&self, &self,
bucket: &str, bucket: &str,
local_file_path: &str, local_file_path: &str,
@@ -92,12 +69,16 @@ impl R2Client {
) -> Result<(), R2Error> { ) -> Result<(), R2Error> {
// Payload (file data) // Payload (file data)
let payload = std::fs::read(local_file_path)?; let payload = std::fs::read(local_file_path)?;
trace!(
"[upload_file] Payload hash for signing: {}",
aws_sigv4::hash(&payload)
);
// Set HTTP Headers // Set HTTP Headers
let content_type = if let Some(content_type) = content_type { let content_type = if let Some(content_type) = content_type {
Some(content_type) Some(content_type)
} else { } else {
Some(Mime::get_mimetype_from_fp(local_file_path)) Some(get_mimetype_from_fp(local_file_path))
}; };
let headers = self.create_headers( let headers = self.create_headers(
Method::PUT, Method::PUT,
@@ -105,17 +86,18 @@ impl R2Client {
Some(r2_file_key), Some(r2_file_key),
&payload, &payload,
content_type, content_type,
None,
)?; )?;
trace!("[upload_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(r2_file_key)); let file_url = self.build_url(bucket, Some(r2_file_key));
let client = reqwest::Client::new(); let client = reqwest::blocking::Client::new();
let resp = client let resp = client
.put(&file_url) .put(&file_url)
.headers(headers) .headers(headers)
.body(payload) .body(payload)
.send() .send()?;
.await?;
let status = resp.status(); let status = resp.status();
let text = resp.text().await?; let text = resp.text()?;
if status.is_success() { if status.is_success() {
Ok(()) Ok(())
} else { } else {
@@ -128,60 +110,87 @@ impl R2Client {
)) ))
} }
} }
pub async fn download_file( pub fn download_file(
&self, &self,
bucket: &str, bucket: &str,
key: &str, key: &str,
local_path: &str, local_path: &str,
extra_headers: Option<Vec<(String, String)>>,
) -> Result<(), R2Error> { ) -> Result<(), R2Error> {
// https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request. // https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_sigv-create-signed-request.html#:~:text=For%20Amazon%20S3%2C%20include%20the%20literal%20string%20UNSIGNED%2DPAYLOAD%20when%20constructing%20a%20canonical%20request%2C%20and%20set%20the%20same%20value%20as%20the%20x%2Damz%2Dcontent%2Dsha256%20header%20value%20when%20sending%20the%20request.
// I don't know if I should trust it though, I don't see public impls with this. // I don't know if I should trust it though, I don't see public impls with this.
let payload = "UNSIGNED-PAYLOAD"; let payload = "";
let content_type = Mime::get_mimetype_from_fp(local_path); trace!("[download_file] Payload for signing: (empty)");
let headers = let headers =
self.create_headers(Method::GET, bucket, Some(key), payload, Some(content_type))?; self.create_headers(Method::GET, bucket, Some(key), payload, None, extra_headers)?;
let file_url = format!("{}/{}/{}", self.endpoint, bucket, key); trace!("[download_file] Headers sent to request: {headers:#?}");
let client = reqwest::Client::new(); let file_url = self.build_url(bucket, Some(key));
let resp = client.get(&file_url).headers(headers).send().await?; let client = reqwest::blocking::Client::new();
let resp = client.get(&file_url).headers(headers).send()?;
let status = resp.status(); let status = resp.status();
if status.is_success() { if status.is_success() {
std::fs::write(local_path, resp.bytes().await?)?; std::fs::write(local_path, resp.bytes()?)?;
Ok(()) Ok(())
} else { } else {
Err(R2Error::FailedRequest( Err(R2Error::FailedRequest(
format!("dowloading file \"{key}\" from bucket \"{bucket}\""), format!("dowloading file \"{key}\" from bucket \"{bucket}\""),
status, status,
resp.text().await?, resp.text()?,
)) ))
} }
} }
async fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> { pub fn delete(&self, bucket: &str, remote_key: &str) -> Result<(), R2Error> {
let payload_hash = "UNSIGNED-PAYLOAD"; let payload = "";
let content_type = "application/xml"; trace!("[delete_file] Payload for signing: (empty)");
let headers = let headers = self.create_headers(
self.create_headers(Method::GET, bucket, None, payload_hash, Some(content_type))?; Method::DELETE,
bucket,
Some(remote_key),
payload,
None,
None,
)?;
trace!("[delete_file] Headers sent to request: {headers:#?}");
let file_url = self.build_url(bucket, Some(remote_key));
let client = reqwest::blocking::Client::new();
let resp = client.delete(&file_url).headers(headers).send()?;
let status = resp.status();
if status.is_success() {
Ok(())
} else {
Err(R2Error::FailedRequest(
format!("deleting file \"{remote_key}\" from bucket \"{bucket}\""),
status,
resp.text()?,
))
}
}
fn get_bucket_listing(&self, bucket: &str) -> Result<String, R2Error> {
let payload = "";
trace!("[get_bucket_listing] Payload for signing: (empty)");
let headers = self.create_headers(Method::GET, bucket, None, payload, None, None)?;
trace!("[get_bucket_listing] Headers sent to request: {headers:#?}");
let url = self.build_url(bucket, None); let url = self.build_url(bucket, None);
let client = reqwest::Client::new(); let client = reqwest::blocking::Client::new();
let resp = client let resp = client
.get(&url) .get(&url)
.headers(headers) .headers(headers)
.send() .send()
.await
.map_err(R2Error::from)?; .map_err(R2Error::from)?;
let status = resp.status(); let status = resp.status();
if status.is_success() { if status.is_success() {
Ok(resp.text().await.map_err(R2Error::from)?) Ok(resp.text().map_err(R2Error::from)?)
} else { } else {
Err(R2Error::FailedRequest( Err(R2Error::FailedRequest(
String::from("list bucket...folders or something idfk"), String::from("list bucket...folders or something idfk"),
status, status,
resp.text().await.map_err(R2Error::from)?, resp.text().map_err(R2Error::from)?,
)) ))
} }
} }
pub async fn list_files(&self, bucket: &str) -> Result<HashMap<String, Vec<String>>, R2Error> { pub fn list_files(&self, bucket: &str) -> Result<HashMap<String, Vec<String>>, R2Error> {
let xml = self.get_bucket_listing(bucket).await?; let xml = self.get_bucket_listing(bucket)?;
let mut files_dict: HashMap<String, Vec<String>> = HashMap::new(); let mut files_dict: HashMap<String, Vec<String>> = HashMap::new();
let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?;
for content in root for content in root
@@ -203,8 +212,8 @@ impl R2Client {
Ok(files_dict) Ok(files_dict)
} }
pub async fn list_folders(&self, bucket: &str) -> Result<Vec<String>, R2Error> { pub fn list_folders(&self, bucket: &str) -> Result<Vec<String>, R2Error> {
let xml = self.get_bucket_listing(bucket).await?; let xml = self.get_bucket_listing(bucket)?;
let mut folders = std::collections::HashSet::new(); let mut folders = std::collections::HashSet::new();
let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?; let root = xmltree::Element::parse(xml.as_bytes()).map_err(R2Error::from)?;
for content in root for content in root
@@ -214,18 +223,21 @@ impl R2Client {
.filter(|e| e.name == "Contents") .filter(|e| e.name == "Contents")
{ {
let key_elem = content.get_child("Key").and_then(|k| k.get_text()); let key_elem = content.get_child("Key").and_then(|k| k.get_text());
if let Some(file_key) = key_elem { if let Some(file_key) = key_elem
if let Some(idx) = file_key.find('/') { && let Some(idx) = file_key.find('/')
{
folders.insert(file_key[..idx].to_string()); folders.insert(file_key[..idx].to_string());
} }
} }
}
Ok(folders.into_iter().collect()) Ok(folders.into_iter().collect())
} }
fn build_url(&self, bucket: &str, key: Option<&str>) -> String { fn build_url(&self, bucket: &str, key: Option<&str>) -> String {
match key { match key {
Some(k) => format!("{}/{}/{}", self.endpoint, bucket, k), Some(k) => {
let encoded_key = aws_sigv4::url_encode(k);
format!("{}/{}/{}", self.endpoint, bucket, encoded_key)
}
None => format!("{}/{}/", self.endpoint, bucket), None => format!("{}/{}/", self.endpoint, bucket),
} }
} }
@@ -279,6 +291,7 @@ mod tests {
Some("key"), Some("key"),
"deadbeef", "deadbeef",
Some("application/octet-stream"), Some("application/octet-stream"),
None,
) )
.unwrap(); .unwrap();
assert!(headers.contains_key("x-amz-date")); assert!(headers.contains_key("x-amz-date"));

View File

@@ -1,4 +1,3 @@
use std::env;
use std::fs; use std::fs;
use std::io::Write; use std::io::Write;
@@ -63,7 +62,6 @@ mod sync_tests {
// Cleanup // Cleanup
fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_upload_path).unwrap();
fs::remove_file(local_download_path).unwrap(); fs::remove_file(local_download_path).unwrap();
// NOTE: The test file on R2 is not deleted as there is no delete API yet.
} }
} }
@@ -132,6 +130,8 @@ mod async_tests {
// Cleanup // Cleanup
fs::remove_file(local_upload_path).unwrap(); fs::remove_file(local_upload_path).unwrap();
fs::remove_file(local_download_path).unwrap(); fs::remove_file(local_download_path).unwrap();
// NOTE: The test file on R2 is not deleted as there is no delete API yet.
// 6. Delete file
bucket.delete_file(r2_file_key).await.unwrap();
} }
} }

33
todo.md
View File

@@ -1,29 +1,12 @@
Okay I think I did everything, so to clean up: ## For release:
- [ ] Create a crate::Result that is Result<u8, R2Error>, and have Ok(status_code)
# Fatal Bug - [ ] Consider dropping more dependencies, using hyper or some lower level stuff for async, and then http for blocking
When building the headers for the async request (_async::r2client::tests::test_create_headers), the final header list is missing the host header. - [ ] A way to view the file contents (UTF-8 valid) would be cool
- [ ] Add functions that will list files with their metadata (perhaps a simple R2File type?)
Check and see if the `aws_signing` function `create_canonical_request_headers`'s output is getting tossed. - [ ] Clear out all all print statements and consider logging (this is a library, after all)
The output of that function includes request and headers with the host headers in there.
I imagine that, instead of parsing that string to turn into a header map or whatever the `signature` function returns, it is creating it's own "content-type" and "x-amz-date" headers and pushing them to it's own vector
I needed to come here to think cause I'm tired and would've fucked it up if I just brute forced it instead of critically thought about it
The expected solution is taking the "request", and turning in into a "HeaderMap", since we aren't low enough level to pass unsafe strings by ourselves.
Technically the output of the canonical_request is correct:
Request: `"PUT\n/bucket/key\n\ncontent-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n\ncontent-type;host;x-amz-date\ndeadbeef"`
However it's not in the format of the vector of strings, and also I don't think there should be a new line after put, but there should be two before the content type. Not sure.
But that's what the request looks like, then the headers that we could parse as well that the same function returns are as follows:
Headers: `"content-type:application/octet-stream\nhost:example.r2.cloudflarestorage.com\nx-amz-date:20250907T043710Z\n"`
Which seems to match up, however the final signature that the main `signature` function returns is:
Headers: `[("content-type", "application/octet-stream"), ("x-amz-date", "20250907T043710Z"), ("authorization", "AWS4-HMAC-SHA256 Credential=AKIAEXAMPLE/20250907/us-east-1/s3/aws4_request, SignedHeaders=content-type;host;x-amz-date, Signature=cfbed04d8dbd8610b9f621c72d7e86f0dae2ffac3174835b4a1ff23ad072893f")]`
So my assumption is that the `signature` function is making it's own headers and dropping the canonical ones instead of parsing one of the returned strings into one.
This is happening before the "authorization" headers (the only headers that the main function should be adding itself) are pushed to the current vector
I think the headers should be parsed into a Vec<(String, String)>, because that would allow the R2Client to turn the request into whatever header map it wants, or a string, which would hopefully make this AWS signing thing work for other services too, although I won't advertise it as it's own library or anything.
Okay after saying all that, surely I could've just fixed it by now and will remember it in the later.
## Dev (since we're so back):
- [X] Update the sync library - [X] Update the sync library
- [X] Make a .env with test-bucket creds - [X] Make a .env with test-bucket creds
- [ ] Actually test the damn thing - [X] Actually test the damn thing
- [ ] Cry (ALL OF THAT WORK, FOR WHAT!? A SINGLE `main.rs` ON GITHUB!?)