first commit, port from other repo

This commit is contained in:
Víctor Martínez 2021-05-15 00:29:53 +02:00
commit 376989e540
15 changed files with 1668 additions and 0 deletions

8
.env.example Normal file
View file

@ -0,0 +1,8 @@
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_URL=
AWS_DEFAULT_REGION=
AWS_BUCKET=
AWS_OBJECTS_PREFIX=
DOWNLOAD_PATH=

13
.github/workflows/main.yml vendored Normal file
View file

@ -0,0 +1,13 @@
name: Publish Docker
on: [push]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- name: Publish to Registry
uses: elgohr/Publish-Docker-Github-Action@master
with:
name: solvethex/s3-bucket-download
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
.env

24
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "(Windows) Launch",
"type": "cppvsdbg",
"request": "launch",
"program": "${workspaceRoot}/target/debug/s3-download",
"args": [],
"stopAtEntry": false,
"cwd": "${workspaceRoot}",
"environment": [],
"externalConsole": true
},
{
"name": "(OSX) Launch",
"type": "lldb",
"request": "launch",
"program": "${workspaceRoot}/target/debug/s3-download",
"args": [],
"cwd": "${workspaceRoot}",
}
]
}

1326
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

18
Cargo.toml Normal file
View file

@ -0,0 +1,18 @@
[package]
name = "s3-download"
version = "0.1.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rusoto_s3 = "0.46.0"
rusoto_signature = "0.46.0"
tokio = { version = "1.5.0", features = ["fs"] }
serde = { version = "1.0", features = ["derive"] }
envy = "0.4"
dotenv = "0.15.0"
lazy_static = "1.4.0"
anyhow = "1.0.40"
thiserror = "1.0.24"
async-recursion = "0.3.2"

25
Dockerfile Normal file
View file

@ -0,0 +1,25 @@
FROM rust:latest as build
WORKDIR /app
COPY ./Cargo.toml/ ./Cargo.toml
COPY ./Cargo.lock ./Cargo.lock
COPY ./src/ ./src/
RUN cargo build --release
FROM ubuntu:latest
ENV DEBIAN_FRONTEND=noninteractive
RUN mkdir /data
ENV DOWNLOAD_PATH "./data"
RUN apt-get -y update && \
apt-get -y upgrade && \
apt -y install ca-certificates libssl-dev libpq-dev
COPY --from=build /app/target/release/s3-download /usr/local/bin
CMD ["s3-download"]

28
README.md Normal file
View file

@ -0,0 +1,28 @@
# S3 Objects downloader
> An script built with Rust using Rusoto & Tokio
## Environment variables
+ AWS_ACCESS_KEY_ID
+ AWS_SECRET_ACCESS_KEY
+ AWS_URL
+ AWS_DEFAULT_REGION
+ AWS_BUCKET
+ AWS_OBJECTS_PREFIX
+ DOWNLOAD_PATH
## Build
```
cargo build [--release]
```
## Run
```
cargo run
```

16
src/config/mod.rs Normal file
View file

@ -0,0 +1,16 @@
use serde::Deserialize;
#[derive(Deserialize, Debug)]
pub struct Config {
pub aws_access_key_id: String,
pub aws_url: String,
pub aws_secret_access_key: String,
pub aws_default_region: String,
pub aws_bucket: String,
pub download_path: String,
pub aws_objects_prefix: Option<String>
}
lazy_static! {
pub static ref CONFIG: Config = envy::from_env::<Config>().unwrap();
}

15
src/errors.rs Normal file
View file

@ -0,0 +1,15 @@
use std::io;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum AppError {
#[error("Corrupted file")]
CorruptedFile,
#[error("{path:?} not found")]
IOError {
path: String,
#[source]
source: io::Error
}
}

30
src/main.rs Normal file
View file

@ -0,0 +1,30 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate async_recursion;
extern crate tokio;
mod config;
mod errors;
mod models;
mod services;
use anyhow::Result;
use config::CONFIG;
use dotenv::dotenv;
use models::s3_client::S3Client;
use services::S3BucketDownloader;
#[tokio::main]
async fn main() -> Result<()> {
dotenv().ok();
let client = S3Client::new(&CONFIG.aws_default_region, &CONFIG.aws_url);
let downloader = S3BucketDownloader::new(client);
downloader
.download(
&config::CONFIG.aws_bucket,
&config::CONFIG.aws_objects_prefix,
&config::CONFIG.download_path,
)
.await
}

1
src/models/mod.rs Normal file
View file

@ -0,0 +1 @@
pub mod s3_client;

68
src/models/s3_client.rs Normal file
View file

@ -0,0 +1,68 @@
use anyhow::{Result, Error};
use rusoto_s3::{GetObjectRequest, ListObjectsV2Request, S3, S3Client as Client, StreamingBody};
use rusoto_signature::Region;
use crate::errors::AppError::*;
pub struct S3Client {
s3: Client
}
impl S3Client {
pub fn new(region: &str, endpoint: &str) -> Self {
let s3_client = Client::new(Region::Custom {
name: region.to_owned(),
endpoint: endpoint.to_owned(),
});
S3Client {
s3: s3_client
}
}
pub async fn fetch_file_content(&self, key: &str, bucket: &str) -> Result<StreamingBody> {
let result = self
.s3
.get_object(GetObjectRequest {
bucket: bucket.into(),
key: key.into(),
..Default::default()
})
.await?;
result.body.ok_or(Error::from(CorruptedFile))
}
#[async_recursion]
pub async fn list_files(
&self,
bucket: &str,
prefix: &Option<String>,
start_after: Option<String>,
) -> Result<Vec<String>> {
let mut keys = self.list_objects(bucket, prefix, start_after).await?;
if !keys.is_empty() {
let last_key = keys.last().unwrap().to_owned();
keys.extend(self.list_files(bucket, prefix, Some(last_key)).await?);
}
Ok(keys)
}
async fn list_objects(
&self,
bucket: &str,
prefix: &Option<String>,
start_after: Option<String>,
) -> Result<Vec<String>> {
let keys = self
.s3
.list_objects_v2(ListObjectsV2Request {
bucket: bucket.into(),
prefix: prefix.to_owned(),
start_after,
..Default::default()
})
.await?
.contents
.map(|contents| contents.into_iter().filter_map(|obj| obj.key).collect())
.unwrap_or(vec![]);
Ok(keys)
}
}

3
src/services/mod.rs Normal file
View file

@ -0,0 +1,3 @@
mod s3_bucket_downloader;
pub use s3_bucket_downloader::S3BucketDownloader;

View file

@ -0,0 +1,91 @@
use crate::{errors::AppError::*, models::s3_client::S3Client};
use anyhow::Result;
use rusoto_s3::StreamingBody;
use std::path::{Path, PathBuf};
use tokio::fs::File;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
pub struct S3BucketDownloader {
client: S3Client,
}
impl S3BucketDownloader {
pub fn new(client: S3Client) -> Self {
S3BucketDownloader { client }
}
pub async fn download(
&self,
bucket_name: &str,
prefix: &Option<String>,
out_path: &str,
) -> Result<()> {
let files = self.client.list_files(bucket_name, prefix, None).await?;
println!("Total files: {}", files.len());
self.download_files(&files, bucket_name, prefix, out_path)
.await
}
async fn download_files(
&self,
files: &Vec<String>,
bucket: &str,
prefix: &Option<String>,
out_path: &str,
) -> Result<()> {
for file in files.iter() {
if !Path::new(out_path).join(file).exists() {
self.download_file(file, bucket, prefix, out_path).await?;
}
}
Ok(())
}
async fn download_file(
&self,
file: &str,
bucket: &str,
prefix: &Option<String>,
out_path: &str,
) -> Result<()> {
let content = self.client.fetch_file_content(file, bucket).await?;
self.write_file(content, file, prefix, out_path).await
}
async fn write_file(
&self,
content: StreamingBody,
key: &str,
prefix: &Option<String>,
folder_path: &str,
) -> Result<()> {
let key = prefix
.as_ref()
.map_or(key, |prefix| key.trim_start_matches(prefix))
.trim_matches('/');
let body = self.read_content(content).await?;
let file_path = Path::new(folder_path).join(key);
Ok(self.write_content(&body, &file_path).await?)
}
async fn read_content(&self, content: StreamingBody) -> Result<Vec<u8>> {
let mut stream = content.into_async_read();
let mut body = Vec::new();
stream.read_to_end(&mut body).await?;
Ok(body)
}
async fn write_content(&self, body: &[u8], file_path: &PathBuf) -> Result<()> {
let str_file_path = file_path.to_string_lossy().to_string();
let mut file = File::create(file_path).await.map_err(|err| IOError {
path: str_file_path.clone(),
source: err,
})?;
Ok(file.write_all(&body).await.map_err(|err| IOError {
path: str_file_path.clone(),
source: err,
})?)
}
}