mirror of
https://codeberg.org/JasterV/s3_bucket_download.git
synced 2026-04-26 18:10:09 +00:00
first commit, port from other repo
This commit is contained in:
commit
376989e540
15 changed files with 1668 additions and 0 deletions
8
.env.example
Normal file
8
.env.example
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
AWS_ACCESS_KEY_ID=
|
||||
AWS_SECRET_ACCESS_KEY=
|
||||
AWS_URL=
|
||||
AWS_DEFAULT_REGION=
|
||||
AWS_BUCKET=
|
||||
AWS_OBJECTS_PREFIX=
|
||||
DOWNLOAD_PATH=
|
||||
|
||||
13
.github/workflows/main.yml
vendored
Normal file
13
.github/workflows/main.yml
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
name: Publish Docker
|
||||
on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@master
|
||||
- name: Publish to Registry
|
||||
uses: elgohr/Publish-Docker-Github-Action@master
|
||||
with:
|
||||
name: solvethex/s3-bucket-download
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
/target
|
||||
.env
|
||||
24
.vscode/launch.json
vendored
Normal file
24
.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "(Windows) Launch",
|
||||
"type": "cppvsdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/target/debug/s3-download",
|
||||
"args": [],
|
||||
"stopAtEntry": false,
|
||||
"cwd": "${workspaceRoot}",
|
||||
"environment": [],
|
||||
"externalConsole": true
|
||||
},
|
||||
{
|
||||
"name": "(OSX) Launch",
|
||||
"type": "lldb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/target/debug/s3-download",
|
||||
"args": [],
|
||||
"cwd": "${workspaceRoot}",
|
||||
}
|
||||
]
|
||||
}
|
||||
1326
Cargo.lock
generated
Normal file
1326
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
18
Cargo.toml
Normal file
18
Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "s3-download"
|
||||
version = "0.1.0"
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rusoto_s3 = "0.46.0"
|
||||
rusoto_signature = "0.46.0"
|
||||
tokio = { version = "1.5.0", features = ["fs"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
envy = "0.4"
|
||||
dotenv = "0.15.0"
|
||||
lazy_static = "1.4.0"
|
||||
anyhow = "1.0.40"
|
||||
thiserror = "1.0.24"
|
||||
async-recursion = "0.3.2"
|
||||
25
Dockerfile
Normal file
25
Dockerfile
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
FROM rust:latest as build
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./Cargo.toml/ ./Cargo.toml
|
||||
COPY ./Cargo.lock ./Cargo.lock
|
||||
COPY ./src/ ./src/
|
||||
|
||||
RUN cargo build --release
|
||||
|
||||
FROM ubuntu:latest
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN mkdir /data
|
||||
|
||||
ENV DOWNLOAD_PATH "./data"
|
||||
|
||||
RUN apt-get -y update && \
|
||||
apt-get -y upgrade && \
|
||||
apt -y install ca-certificates libssl-dev libpq-dev
|
||||
|
||||
COPY --from=build /app/target/release/s3-download /usr/local/bin
|
||||
|
||||
CMD ["s3-download"]
|
||||
28
README.md
Normal file
28
README.md
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# S3 Objects downloader
|
||||
|
||||
> An script built with Rust using Rusoto & Tokio
|
||||
|
||||
## Environment variables
|
||||
|
||||
+ AWS_ACCESS_KEY_ID
|
||||
+ AWS_SECRET_ACCESS_KEY
|
||||
+ AWS_URL
|
||||
+ AWS_DEFAULT_REGION
|
||||
+ AWS_BUCKET
|
||||
+ AWS_OBJECTS_PREFIX
|
||||
+ DOWNLOAD_PATH
|
||||
|
||||
## Build
|
||||
|
||||
```
|
||||
cargo build [--release]
|
||||
```
|
||||
|
||||
## Run
|
||||
|
||||
```
|
||||
cargo run
|
||||
```
|
||||
|
||||
|
||||
|
||||
16
src/config/mod.rs
Normal file
16
src/config/mod.rs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
use serde::Deserialize;
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct Config {
|
||||
pub aws_access_key_id: String,
|
||||
pub aws_url: String,
|
||||
pub aws_secret_access_key: String,
|
||||
pub aws_default_region: String,
|
||||
pub aws_bucket: String,
|
||||
pub download_path: String,
|
||||
pub aws_objects_prefix: Option<String>
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
pub static ref CONFIG: Config = envy::from_env::<Config>().unwrap();
|
||||
}
|
||||
15
src/errors.rs
Normal file
15
src/errors.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
use std::io;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum AppError {
|
||||
#[error("Corrupted file")]
|
||||
CorruptedFile,
|
||||
#[error("{path:?} not found")]
|
||||
IOError {
|
||||
path: String,
|
||||
#[source]
|
||||
source: io::Error
|
||||
}
|
||||
}
|
||||
30
src/main.rs
Normal file
30
src/main.rs
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate async_recursion;
|
||||
extern crate tokio;
|
||||
|
||||
mod config;
|
||||
mod errors;
|
||||
mod models;
|
||||
mod services;
|
||||
|
||||
use anyhow::Result;
|
||||
use config::CONFIG;
|
||||
use dotenv::dotenv;
|
||||
use models::s3_client::S3Client;
|
||||
use services::S3BucketDownloader;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
let client = S3Client::new(&CONFIG.aws_default_region, &CONFIG.aws_url);
|
||||
let downloader = S3BucketDownloader::new(client);
|
||||
downloader
|
||||
.download(
|
||||
&config::CONFIG.aws_bucket,
|
||||
&config::CONFIG.aws_objects_prefix,
|
||||
&config::CONFIG.download_path,
|
||||
)
|
||||
.await
|
||||
}
|
||||
1
src/models/mod.rs
Normal file
1
src/models/mod.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod s3_client;
|
||||
68
src/models/s3_client.rs
Normal file
68
src/models/s3_client.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
use anyhow::{Result, Error};
|
||||
use rusoto_s3::{GetObjectRequest, ListObjectsV2Request, S3, S3Client as Client, StreamingBody};
|
||||
use rusoto_signature::Region;
|
||||
use crate::errors::AppError::*;
|
||||
|
||||
pub struct S3Client {
|
||||
s3: Client
|
||||
}
|
||||
|
||||
impl S3Client {
|
||||
pub fn new(region: &str, endpoint: &str) -> Self {
|
||||
let s3_client = Client::new(Region::Custom {
|
||||
name: region.to_owned(),
|
||||
endpoint: endpoint.to_owned(),
|
||||
});
|
||||
S3Client {
|
||||
s3: s3_client
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn fetch_file_content(&self, key: &str, bucket: &str) -> Result<StreamingBody> {
|
||||
let result = self
|
||||
.s3
|
||||
.get_object(GetObjectRequest {
|
||||
bucket: bucket.into(),
|
||||
key: key.into(),
|
||||
..Default::default()
|
||||
})
|
||||
.await?;
|
||||
result.body.ok_or(Error::from(CorruptedFile))
|
||||
}
|
||||
|
||||
#[async_recursion]
|
||||
pub async fn list_files(
|
||||
&self,
|
||||
bucket: &str,
|
||||
prefix: &Option<String>,
|
||||
start_after: Option<String>,
|
||||
) -> Result<Vec<String>> {
|
||||
let mut keys = self.list_objects(bucket, prefix, start_after).await?;
|
||||
if !keys.is_empty() {
|
||||
let last_key = keys.last().unwrap().to_owned();
|
||||
keys.extend(self.list_files(bucket, prefix, Some(last_key)).await?);
|
||||
}
|
||||
Ok(keys)
|
||||
}
|
||||
|
||||
async fn list_objects(
|
||||
&self,
|
||||
bucket: &str,
|
||||
prefix: &Option<String>,
|
||||
start_after: Option<String>,
|
||||
) -> Result<Vec<String>> {
|
||||
let keys = self
|
||||
.s3
|
||||
.list_objects_v2(ListObjectsV2Request {
|
||||
bucket: bucket.into(),
|
||||
prefix: prefix.to_owned(),
|
||||
start_after,
|
||||
..Default::default()
|
||||
})
|
||||
.await?
|
||||
.contents
|
||||
.map(|contents| contents.into_iter().filter_map(|obj| obj.key).collect())
|
||||
.unwrap_or(vec![]);
|
||||
Ok(keys)
|
||||
}
|
||||
}
|
||||
3
src/services/mod.rs
Normal file
3
src/services/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
mod s3_bucket_downloader;
|
||||
|
||||
pub use s3_bucket_downloader::S3BucketDownloader;
|
||||
91
src/services/s3_bucket_downloader.rs
Normal file
91
src/services/s3_bucket_downloader.rs
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
use crate::{errors::AppError::*, models::s3_client::S3Client};
|
||||
use anyhow::Result;
|
||||
use rusoto_s3::StreamingBody;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncReadExt, AsyncWriteExt};
|
||||
|
||||
pub struct S3BucketDownloader {
|
||||
client: S3Client,
|
||||
}
|
||||
|
||||
impl S3BucketDownloader {
|
||||
pub fn new(client: S3Client) -> Self {
|
||||
S3BucketDownloader { client }
|
||||
}
|
||||
|
||||
pub async fn download(
|
||||
&self,
|
||||
bucket_name: &str,
|
||||
prefix: &Option<String>,
|
||||
out_path: &str,
|
||||
) -> Result<()> {
|
||||
let files = self.client.list_files(bucket_name, prefix, None).await?;
|
||||
println!("Total files: {}", files.len());
|
||||
self.download_files(&files, bucket_name, prefix, out_path)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn download_files(
|
||||
&self,
|
||||
files: &Vec<String>,
|
||||
bucket: &str,
|
||||
prefix: &Option<String>,
|
||||
out_path: &str,
|
||||
) -> Result<()> {
|
||||
for file in files.iter() {
|
||||
if !Path::new(out_path).join(file).exists() {
|
||||
self.download_file(file, bucket, prefix, out_path).await?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn download_file(
|
||||
&self,
|
||||
file: &str,
|
||||
bucket: &str,
|
||||
prefix: &Option<String>,
|
||||
out_path: &str,
|
||||
) -> Result<()> {
|
||||
let content = self.client.fetch_file_content(file, bucket).await?;
|
||||
self.write_file(content, file, prefix, out_path).await
|
||||
}
|
||||
|
||||
async fn write_file(
|
||||
&self,
|
||||
content: StreamingBody,
|
||||
key: &str,
|
||||
prefix: &Option<String>,
|
||||
folder_path: &str,
|
||||
) -> Result<()> {
|
||||
let key = prefix
|
||||
.as_ref()
|
||||
.map_or(key, |prefix| key.trim_start_matches(prefix))
|
||||
.trim_matches('/');
|
||||
let body = self.read_content(content).await?;
|
||||
let file_path = Path::new(folder_path).join(key);
|
||||
Ok(self.write_content(&body, &file_path).await?)
|
||||
}
|
||||
|
||||
async fn read_content(&self, content: StreamingBody) -> Result<Vec<u8>> {
|
||||
let mut stream = content.into_async_read();
|
||||
let mut body = Vec::new();
|
||||
stream.read_to_end(&mut body).await?;
|
||||
Ok(body)
|
||||
}
|
||||
|
||||
async fn write_content(&self, body: &[u8], file_path: &PathBuf) -> Result<()> {
|
||||
let str_file_path = file_path.to_string_lossy().to_string();
|
||||
|
||||
let mut file = File::create(file_path).await.map_err(|err| IOError {
|
||||
path: str_file_path.clone(),
|
||||
source: err,
|
||||
})?;
|
||||
|
||||
Ok(file.write_all(&body).await.map_err(|err| IOError {
|
||||
path: str_file_path.clone(),
|
||||
source: err,
|
||||
})?)
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue