mirror of
https://codeberg.org/JasterV/s3_bucket_download.git
synced 2026-04-27 02:15:47 +00:00
first commit, port from other repo
This commit is contained in:
commit
376989e540
15 changed files with 1668 additions and 0 deletions
8
.env.example
Normal file
8
.env.example
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
AWS_ACCESS_KEY_ID=
|
||||||
|
AWS_SECRET_ACCESS_KEY=
|
||||||
|
AWS_URL=
|
||||||
|
AWS_DEFAULT_REGION=
|
||||||
|
AWS_BUCKET=
|
||||||
|
AWS_OBJECTS_PREFIX=
|
||||||
|
DOWNLOAD_PATH=
|
||||||
|
|
||||||
13
.github/workflows/main.yml
vendored
Normal file
13
.github/workflows/main.yml
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
name: Publish Docker
|
||||||
|
on: [push]
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@master
|
||||||
|
- name: Publish to Registry
|
||||||
|
uses: elgohr/Publish-Docker-Github-Action@master
|
||||||
|
with:
|
||||||
|
name: solvethex/s3-bucket-download
|
||||||
|
username: ${{ secrets.DOCKER_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
.env
|
||||||
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "(Windows) Launch",
|
||||||
|
"type": "cppvsdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceRoot}/target/debug/s3-download",
|
||||||
|
"args": [],
|
||||||
|
"stopAtEntry": false,
|
||||||
|
"cwd": "${workspaceRoot}",
|
||||||
|
"environment": [],
|
||||||
|
"externalConsole": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "(OSX) Launch",
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceRoot}/target/debug/s3-download",
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceRoot}",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
1326
Cargo.lock
generated
Normal file
1326
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
18
Cargo.toml
Normal file
18
Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
[package]
|
||||||
|
name = "s3-download"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
rusoto_s3 = "0.46.0"
|
||||||
|
rusoto_signature = "0.46.0"
|
||||||
|
tokio = { version = "1.5.0", features = ["fs"] }
|
||||||
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
envy = "0.4"
|
||||||
|
dotenv = "0.15.0"
|
||||||
|
lazy_static = "1.4.0"
|
||||||
|
anyhow = "1.0.40"
|
||||||
|
thiserror = "1.0.24"
|
||||||
|
async-recursion = "0.3.2"
|
||||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
FROM rust:latest as build
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./Cargo.toml/ ./Cargo.toml
|
||||||
|
COPY ./Cargo.lock ./Cargo.lock
|
||||||
|
COPY ./src/ ./src/
|
||||||
|
|
||||||
|
RUN cargo build --release
|
||||||
|
|
||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
ENV DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
RUN mkdir /data
|
||||||
|
|
||||||
|
ENV DOWNLOAD_PATH "./data"
|
||||||
|
|
||||||
|
RUN apt-get -y update && \
|
||||||
|
apt-get -y upgrade && \
|
||||||
|
apt -y install ca-certificates libssl-dev libpq-dev
|
||||||
|
|
||||||
|
COPY --from=build /app/target/release/s3-download /usr/local/bin
|
||||||
|
|
||||||
|
CMD ["s3-download"]
|
||||||
28
README.md
Normal file
28
README.md
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
# S3 Objects downloader
|
||||||
|
|
||||||
|
> An script built with Rust using Rusoto & Tokio
|
||||||
|
|
||||||
|
## Environment variables
|
||||||
|
|
||||||
|
+ AWS_ACCESS_KEY_ID
|
||||||
|
+ AWS_SECRET_ACCESS_KEY
|
||||||
|
+ AWS_URL
|
||||||
|
+ AWS_DEFAULT_REGION
|
||||||
|
+ AWS_BUCKET
|
||||||
|
+ AWS_OBJECTS_PREFIX
|
||||||
|
+ DOWNLOAD_PATH
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
```
|
||||||
|
cargo build [--release]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Run
|
||||||
|
|
||||||
|
```
|
||||||
|
cargo run
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
16
src/config/mod.rs
Normal file
16
src/config/mod.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
||||||
|
use serde::Deserialize;
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug)]
|
||||||
|
pub struct Config {
|
||||||
|
pub aws_access_key_id: String,
|
||||||
|
pub aws_url: String,
|
||||||
|
pub aws_secret_access_key: String,
|
||||||
|
pub aws_default_region: String,
|
||||||
|
pub aws_bucket: String,
|
||||||
|
pub download_path: String,
|
||||||
|
pub aws_objects_prefix: Option<String>
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
pub static ref CONFIG: Config = envy::from_env::<Config>().unwrap();
|
||||||
|
}
|
||||||
15
src/errors.rs
Normal file
15
src/errors.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum AppError {
|
||||||
|
#[error("Corrupted file")]
|
||||||
|
CorruptedFile,
|
||||||
|
#[error("{path:?} not found")]
|
||||||
|
IOError {
|
||||||
|
path: String,
|
||||||
|
#[source]
|
||||||
|
source: io::Error
|
||||||
|
}
|
||||||
|
}
|
||||||
30
src/main.rs
Normal file
30
src/main.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#[macro_use]
|
||||||
|
extern crate lazy_static;
|
||||||
|
#[macro_use]
|
||||||
|
extern crate async_recursion;
|
||||||
|
extern crate tokio;
|
||||||
|
|
||||||
|
mod config;
|
||||||
|
mod errors;
|
||||||
|
mod models;
|
||||||
|
mod services;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use config::CONFIG;
|
||||||
|
use dotenv::dotenv;
|
||||||
|
use models::s3_client::S3Client;
|
||||||
|
use services::S3BucketDownloader;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() -> Result<()> {
|
||||||
|
dotenv().ok();
|
||||||
|
let client = S3Client::new(&CONFIG.aws_default_region, &CONFIG.aws_url);
|
||||||
|
let downloader = S3BucketDownloader::new(client);
|
||||||
|
downloader
|
||||||
|
.download(
|
||||||
|
&config::CONFIG.aws_bucket,
|
||||||
|
&config::CONFIG.aws_objects_prefix,
|
||||||
|
&config::CONFIG.download_path,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
1
src/models/mod.rs
Normal file
1
src/models/mod.rs
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
pub mod s3_client;
|
||||||
68
src/models/s3_client.rs
Normal file
68
src/models/s3_client.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
use anyhow::{Result, Error};
|
||||||
|
use rusoto_s3::{GetObjectRequest, ListObjectsV2Request, S3, S3Client as Client, StreamingBody};
|
||||||
|
use rusoto_signature::Region;
|
||||||
|
use crate::errors::AppError::*;
|
||||||
|
|
||||||
|
pub struct S3Client {
|
||||||
|
s3: Client
|
||||||
|
}
|
||||||
|
|
||||||
|
impl S3Client {
|
||||||
|
pub fn new(region: &str, endpoint: &str) -> Self {
|
||||||
|
let s3_client = Client::new(Region::Custom {
|
||||||
|
name: region.to_owned(),
|
||||||
|
endpoint: endpoint.to_owned(),
|
||||||
|
});
|
||||||
|
S3Client {
|
||||||
|
s3: s3_client
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fetch_file_content(&self, key: &str, bucket: &str) -> Result<StreamingBody> {
|
||||||
|
let result = self
|
||||||
|
.s3
|
||||||
|
.get_object(GetObjectRequest {
|
||||||
|
bucket: bucket.into(),
|
||||||
|
key: key.into(),
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
result.body.ok_or(Error::from(CorruptedFile))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_recursion]
|
||||||
|
pub async fn list_files(
|
||||||
|
&self,
|
||||||
|
bucket: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
start_after: Option<String>,
|
||||||
|
) -> Result<Vec<String>> {
|
||||||
|
let mut keys = self.list_objects(bucket, prefix, start_after).await?;
|
||||||
|
if !keys.is_empty() {
|
||||||
|
let last_key = keys.last().unwrap().to_owned();
|
||||||
|
keys.extend(self.list_files(bucket, prefix, Some(last_key)).await?);
|
||||||
|
}
|
||||||
|
Ok(keys)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn list_objects(
|
||||||
|
&self,
|
||||||
|
bucket: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
start_after: Option<String>,
|
||||||
|
) -> Result<Vec<String>> {
|
||||||
|
let keys = self
|
||||||
|
.s3
|
||||||
|
.list_objects_v2(ListObjectsV2Request {
|
||||||
|
bucket: bucket.into(),
|
||||||
|
prefix: prefix.to_owned(),
|
||||||
|
start_after,
|
||||||
|
..Default::default()
|
||||||
|
})
|
||||||
|
.await?
|
||||||
|
.contents
|
||||||
|
.map(|contents| contents.into_iter().filter_map(|obj| obj.key).collect())
|
||||||
|
.unwrap_or(vec![]);
|
||||||
|
Ok(keys)
|
||||||
|
}
|
||||||
|
}
|
||||||
3
src/services/mod.rs
Normal file
3
src/services/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
mod s3_bucket_downloader;
|
||||||
|
|
||||||
|
pub use s3_bucket_downloader::S3BucketDownloader;
|
||||||
91
src/services/s3_bucket_downloader.rs
Normal file
91
src/services/s3_bucket_downloader.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
||||||
|
use crate::{errors::AppError::*, models::s3_client::S3Client};
|
||||||
|
use anyhow::Result;
|
||||||
|
use rusoto_s3::StreamingBody;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use tokio::fs::File;
|
||||||
|
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||||
|
|
||||||
|
pub struct S3BucketDownloader {
|
||||||
|
client: S3Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl S3BucketDownloader {
|
||||||
|
pub fn new(client: S3Client) -> Self {
|
||||||
|
S3BucketDownloader { client }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn download(
|
||||||
|
&self,
|
||||||
|
bucket_name: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
out_path: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let files = self.client.list_files(bucket_name, prefix, None).await?;
|
||||||
|
println!("Total files: {}", files.len());
|
||||||
|
self.download_files(&files, bucket_name, prefix, out_path)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn download_files(
|
||||||
|
&self,
|
||||||
|
files: &Vec<String>,
|
||||||
|
bucket: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
out_path: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
for file in files.iter() {
|
||||||
|
if !Path::new(out_path).join(file).exists() {
|
||||||
|
self.download_file(file, bucket, prefix, out_path).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn download_file(
|
||||||
|
&self,
|
||||||
|
file: &str,
|
||||||
|
bucket: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
out_path: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let content = self.client.fetch_file_content(file, bucket).await?;
|
||||||
|
self.write_file(content, file, prefix, out_path).await
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_file(
|
||||||
|
&self,
|
||||||
|
content: StreamingBody,
|
||||||
|
key: &str,
|
||||||
|
prefix: &Option<String>,
|
||||||
|
folder_path: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
let key = prefix
|
||||||
|
.as_ref()
|
||||||
|
.map_or(key, |prefix| key.trim_start_matches(prefix))
|
||||||
|
.trim_matches('/');
|
||||||
|
let body = self.read_content(content).await?;
|
||||||
|
let file_path = Path::new(folder_path).join(key);
|
||||||
|
Ok(self.write_content(&body, &file_path).await?)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn read_content(&self, content: StreamingBody) -> Result<Vec<u8>> {
|
||||||
|
let mut stream = content.into_async_read();
|
||||||
|
let mut body = Vec::new();
|
||||||
|
stream.read_to_end(&mut body).await?;
|
||||||
|
Ok(body)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn write_content(&self, body: &[u8], file_path: &PathBuf) -> Result<()> {
|
||||||
|
let str_file_path = file_path.to_string_lossy().to_string();
|
||||||
|
|
||||||
|
let mut file = File::create(file_path).await.map_err(|err| IOError {
|
||||||
|
path: str_file_path.clone(),
|
||||||
|
source: err,
|
||||||
|
})?;
|
||||||
|
|
||||||
|
Ok(file.write_all(&body).await.map_err(|err| IOError {
|
||||||
|
path: str_file_path.clone(),
|
||||||
|
source: err,
|
||||||
|
})?)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue