From 1e31b5df8afbecf9928adff9abb05a762bc60659 Mon Sep 17 00:00:00 2001
From: Idriss Neumann <idriss.neumann@comwork.io>
Date: Tue, 18 Oct 2022 10:02:07 +0100
Subject: [PATCH] Improve the clean with retry when there is another folder

---
 .gitlab-ci.yml |  2 +-
 README.md      |  2 ++
 entrypoint.sh  | 74 +++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7fe8a23..f1b3948 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -14,7 +14,7 @@ mirror:
 deliver:
   stage: deliver
   script:
-    - setsid ./ci/docker-deliver.sh "bucket-backup" "2.2"
+    - setsid ./ci/docker-deliver.sh "bucket-backup" "2.3"
   only:
     refs:
       - /^(main.*)$/
diff --git a/README.md b/README.md
index 2d1cadc..528d8f2 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ You'll find a tutorial on how to use it with ansible on the [comwork cloud docum
 * `BUCKET_NAME` (optional): bucket name (if you're using a global endpoint that serve multiple buckets)
 * `DATE_FORMAT` (optional): backup date format (folder name). Default `+%F` which corresponds to `YYYY-MM-JJ`)
 * `MAX_RETENTION` (optional): number of days to keep backup. Default: `5` days.s
+* `DISABLE_BACKUP` (optional): if set, disable the backup step
+* `DISABLE_CLEAN` (optional): if set, disable the clean step
 
 Not: you can upload to multiple backup using an increment suffix (the suffix must be consecutives and start from `1`):
 * `BUCKET_ENDPOINT_1`, `BUCKET_ACCESS_KEY_1`, `BUCKET_SECRET_KEY_1`, `BUCKET_NAME_1`
diff --git a/entrypoint.sh b/entrypoint.sh
index e63aa3a..4c2e543 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -2,6 +2,10 @@
 
 MC_BIN="/usr/bin/mc"
 
+# Features flags
+[[ ! $DISABLE_BACKUP ]] && DISABLE_BACKUP=""
+[[ ! $DISABLE_CLEAN ]] && DISABLE_CLEAN=""
+
 # Dates to keep
 [[ ! $DATE_FORMAT ]] && DATE_FORMAT="+%F"
 TODAY="$(date "$DATE_FORMAT")"
@@ -13,24 +17,13 @@ for i in $(seq 1 "$MAX_RETENTION"); do
   DATES_TO_KEEP="$DATES_TO_KEEP\|$(date --date="$i day ago" "$DATE_FORMAT")"
 done
 
-bucket_backup() {
+compute_bucket_name_and_path() {
   endpoint="${1}"
-  access_key="${2}"
-  secret_key="${3}"
-  dest="${4}"
-  bucket_name="${5}"
-
-  "${MC_BIN}" config host add "${dest}" "${endpoint}" "${access_key}" "${secret_key}"
+  bucket_name="${2}"
 
   bucket_subpath=""
   [[ $bucket_name ]] && bucket_subpath="${bucket_name}/"
 
-  echo "[bucket_backup] Copying backup file. ${BACKUP_LOCATION} -> ${dest}/${bucket_subpath}${BACKUP_FOLDER}/${TODAY}.${FILE_EXTENSION}"
-  "${MC_BIN}" cp "${BACKUP_LOCATION}" "${dest}/${bucket_subpath}${BACKUP_FOLDER}/${TODAY}.${FILE_EXTENSION}"
-
-  echo "[bucket_backup] Deleting old backup data"
-  echo "[bucket_backup] Deleting data older than $(date --date="${MAX_RETENTION} days ago" "${DATE_FORMAT}")"
-
   # If you use a scaleway endpoint, there's some specificities to handle
   if [[ $endpoint =~ https://.+s3.*.scw.cloud ]]; then
     bucket_name="$(echo $endpoint|sed "s/https:\/\/\(.*\)\.s3\..*\.scw\.cloud/\1/g")"
@@ -38,7 +31,6 @@ bucket_backup() {
     endpoint="https://s3.${region}.scw.cloud"
     bucket_subpath=""
     [[ $bucket_name ]] && bucket_subpath="${bucket_name}/"
-    echo "[bucket_backup][scaleway] region=${region}, bucket_name=${bucket_name}, endpoint=${endpoint}"
   fi
 
   # If you use ovh endpoint, pretty same things
@@ -48,11 +40,60 @@ bucket_backup() {
     endpoint="https://s3.${region}.perf.cloud.ovh.net"
     bucket_subpath=""
     [[ $bucket_name ]] && bucket_subpath="${bucket_name}/"
-    echo "[bucket_backup][ovh] region=${region}, bucket_name=${bucket_name}, endpoint=${endpoint}"
   fi
 
+  echo "${region}::${bucket_name}::${endpoint}::${bucket_subpath}"
+}
+
+bucket_backup() {
+  endpoint="${1}"
+  access_key="${2}"
+  secret_key="${3}"
+  dest="${4}"
+  bucket_name="${5}"
+
+  "${MC_BIN}" config host add "${dest}" "${endpoint}" "${access_key}" "${secret_key}"
+
+  infos="$(compute_bucket_name_and_path "${endpoint}" "${bucket_name}")"
+  region="$(echo "${infos}"|awk -F "::" '{print $1}')"
+  bucket_name="$(echo "${infos}"|awk -F "::" '{print $2}')"
+  endpoint="$(echo "${infos}"|awk -F "::" '{print $3}')"
+  bucket_subpath="$(echo "${infos}"|awk -F "::" '{print $4}')"
+  echo "[bucket_backup] region=${region}, bucket_name=${bucket_name}, endpoint=${endpoint}, bucket_subpath=${bucket_subpath}"  
+
+  echo "[bucket_backup] Copying backup file. ${BACKUP_LOCATION} -> ${dest}/${bucket_subpath}${BACKUP_FOLDER}/${TODAY}.${FILE_EXTENSION}"
+  "${MC_BIN}" cp "${BACKUP_LOCATION}" "${dest}/${bucket_subpath}${BACKUP_FOLDER}/${TODAY}.${FILE_EXTENSION}"
+}
+
+clean_backups() {
+  endpoint="${1}"
+  access_key="${2}"
+  secret_key="${3}"
+  dest="${4}"
+  bucket_name="${5}"
+
+  echo "[clean_backups] Deleting old backup data"
+  echo "[clean_backups] Deleting data older than $(date --date="${MAX_RETENTION} days ago" "${DATE_FORMAT}")"
+
+  infos="$(compute_bucket_name_and_path "${endpoint}" "${bucket_name}")"
+  region="$(echo "${infos}"|awk -F "::" '{print $1}')"
+  bucket_name="$(echo "${infos}"|awk -F "::" '{print $2}')"
+  endpoint="$(echo "${infos}"|awk -F "::" '{print $3}')"
+  bucket_subpath="$(echo "${infos}"|awk -F "::" '{print $4}')"
+  echo "[clean_backups] region=${region}, bucket_name=${bucket_name}, endpoint=${endpoint}, bucket_subpath=${bucket_subpath}"  
+
   "${MC_BIN}" config host add "r${dest}" "${endpoint}" "${access_key}" "${secret_key}"
   echo "${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/"
+
+  "${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/"
+  results=$("${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/")
+  if [[ ! $results ]]; then
+    bucket_subpath="${bucket_subpath}${bucket_name}/"
+    echo "[clean_backups] Retry with bucket_subpath=${bucket_subpath}"
+    echo "${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/"
+    "${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/"
+  fi
+
   "${MC_BIN}" ls -r "r${dest}/${bucket_subpath}${BACKUP_FOLDER}/" |
     awk '{print $6}' |
     grep -v -w "$DATES_TO_KEEP" |
@@ -76,7 +117,8 @@ apply_bucket_backup() {
 
   echo "[apply_bucket_backup] suffix=${suffix} endpoint=${endpoint} bucket_name=${bucket_name} dest=${dest}"
   if [[ $endpoint && $access_key && $secret_key ]]; then 
-    bucket_backup "${endpoint}" "${access_key}" "${secret_key}" "${dest}" "${bucket_name}"
+    [[ $DISABLE_BACKUP ]] || bucket_backup "${endpoint}" "${access_key}" "${secret_key}" "${dest}" "${bucket_name}"
+    [[ $DISABLE_CLEAN ]] || clean_backups "${endpoint}" "${access_key}" "${secret_key}" "${dest}" "${bucket_name}"
     return 0
   fi
 
-- 
GitLab