PinkHello
做一个快乐的程序猿
Aws上mongo备份进入S3

机器准备

  • 开启新机器, 注意需要备份的数据量。选择磁盘或者数据盘大小
  • 老机器上, 磁盘在线需要扩容的话注意执行扩容,让设备分区和文件系统都识别
    • 云台上选择需要扩容的磁盘扩大到需要的大小(当前机器上还无法直接使用)
    • 进入机器控制台
        # df -lh
        # lsblk
        # growpart 磁盘设备名称 1 
        # resize2fs 磁盘文件系统
        # df -lh
      

备份脚本准备

  • backup.sh 备份脚本
#!/bin/bash

# 公共 function 日志日期格式
get_date () {
    date +[%Y-%m-%d\ %H:%M:%S]
}
ARCHIVE_OUT=$BACKUP_FILENAME_PREFIX-$(date +$BACKUP_FILENAME_DATE_FORMAT).tgz

echo "ARCHIVE_OUT=$ARCHIVE_OUT"

# 生成POST请求数据
generate_post_data () {
  cat <<EOF
{
  "group":"alert-XXX",
  "project":"$S3_PATH$ARCHIVE_OUT",
  "alert_message":"备份mongo2s3错误"
}
EOF
}

# Script
echo "$(get_date) Mongo backup started"
echo "$(get_date) [Step 1/4] Running mongodump: mongodump --forceTableScan -h $MONGO_HOST $MONGO_DB_ARG -u $MONGO_USERNAME -p $MONGO_PASSWORD --authenticationDatabase admin"
# mongodump --quiet -h $MONGO_HOST:$MONGO_PORT $MONGO_DB_ARG -u $MONGO_USERNAME -p $MONGO_PASSWORD --authenticationDatabase admin
mongodump --forceTableScan -h $MONGO_HOST $MONGO_DB_ARG -u $MONGO_USERNAME -p $MONGO_PASSWORD --authenticationDatabase admin

echo "$(get_date) [Step 2/4] check dump directory: curl feishu ..."
if [[ ! -d "dump" ]]; then
  echo "dump not exist"
  curl $ALERT_URL -X POST --data "$(generate_post_data)" --header "Content-Type: application/json"
  exit 0
fi

echo "$(get_date) [Step 3/4] Creating tar archive: tar -zcvf $ARCHIVE_OUT dump/"
tar -zcvf $ARCHIVE_OUT dump/
rm -rf dump/

#echo "$(get_date) [Step 4/4] Uploading archive to S3 : aws s3api put-object --bucket $S3_BUCKET --key $S3_PATH$ARCHIVE_OUT --body $ARCHIVE_OUT --storage-class $S3_STORAGE_CLASS"
#/usr/local/bin/aws s3api put-object --bucket $S3_BUCKET --key $S3_PATH$ARCHIVE_OUT --body $ARCHIVE_OUT --storage-class $S3_STORAGE_CLASS
#rm $ARCHIVE_OUT
echo "$(get_date) [Step 4/4] Uploading archive(> 5G) to S3 : aws s3 cp $ARCHIVE_OUT s3://$S3_BUCKET/$S3_PATH$ARCHIVE_OUT"
/usr/local/bin/aws s3 cp $ARCHIVE_OUT s3://$S3_BUCKET/$S3_PATH$ARCHIVE_OUT
rm $ARCHIVE_OUT

echo "$(get_date) Mongo backup completed successfully"

echo "$(get_date) S3 history check started"
#echo "$(get_date) [Step 1/3] aws s3api list-objects --bucket $S3_BUCKET --prefix $S3_PATH > list.json"
#/usr/local/bin/aws s3api list-objects --bucket $S3_BUCKET --prefix $S3_PATH > list.json
echo "$(get_date) [Step 4/4] Uploading archive(over 5G) to S3 : aws s3 cp $ARCHIVE_OUT s3://$S3_BUCKET/$S3_PATH$ARCHIVE_OUT"
/usr/local/bin/aws s3 cp $ARCHIVE_OUT s3://$S3_BUCKET/$S3_PATH$ARCHIVE_OUT


# 只保留环境变量配置的内的历史记录文件数
echo "$(get_date) [Step 2/3] jq .Contents list.json | jq -r '.[] | .Key' > list.txt"
jq .Contents list.json | jq -r '.[] | .Key' > list.txt

echo "$(get_date) [Step 3/3] process..... s3 history...."
CNT=`wc -l list.txt|awk '{print $1}'`
VAL_CNT=$(($CNT-$S3_HISTORY_NUM))
if [[ $VAL_CNT -gt 0 ]]; then
   for ((i=1; i<=$VAL_CNT; i++))
   do
     FILE_NAME=`sed -n ${i}p list.txt`
     echo "$(get_date) [Step 3/3/$i] aws s3api delete-object --bucket $S3_BUCKET --key $FILE_NAME"
     /usr/local/bin/aws s3api delete-object --bucket $S3_BUCKET --key $FILE_NAME
   done
fi

echo "$(get_date) S3 history check completed."

  • start.sh 启动脚本
#!/bin/bash

CRON_SCHEDULE=${CRON_SCHEDULE:-0 1 * * *}

CRON_ENVIRONMENT="
ALERT_URL=${ALERT_URL:?"env variable is required"}
AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:?"env variable is required"}
AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:?"env variable is required"}
MONGO_HOST=${MONGO_HOST:?"env variable is required"}
MONGO_USERNAME=${MONGO_USERNAME:?"env variable is required"}
MONGO_PASSWORD=${MONGO_PASSWORD:?"env variable is required"}
S3_BUCKET=${S3_BUCKET:?"env variable is required"}
S3_PATH=${S3_PATH:-}
S3_STORAGE_CLASS=${S3_STORAGE_CLASS:-STANDARD}
BACKUP_FILENAME_DATE_FORMAT=${BACKUP_FILENAME_DATE_FORMAT:-%Y%m%d}
BACKUP_FILENAME_PREFIX=${BACKUP_FILENAME_PREFIX:-mongo_backup}
S3_HISTORY_NUM=${S3_HISTORY_NUM:-20}
"

if [[ -n "$MONGO_DB" ]]; then
CRON_ENVIRONMENT="
$CRON_ENVIRONMENT
MONGO_DB_ARG=--db $MONGO_DB
"
fi

if [[ -n "$AWS_DEFAULT_REGION" ]]; then
CRON_ENVIRONMENT="
$CRON_ENVIRONMENT
AWS_DEFAULT_REGION=$AWS_DEFAULT_REGION
"
fi

LOGFIFO='/var/log/backup_script.log'
if [[ ! -e "$LOGFIFO" ]]; then
    touch "$LOGFIFO"
fi

CRON_COMMAND="/script/backup.sh > $LOGFIFO 2>&1"

echo
echo "Configuration"
echo
echo "CRON_SCHEDULE"
echo
echo "$CRON_SCHEDULE"
echo
echo "CRON_ENVIRONMENT"
echo "$CRON_ENVIRONMENT"

echo "$CRON_ENVIRONMENT$CRON_SCHEDULE $CRON_COMMAND" | crontab -

echo "crontab -l"
crontab -l

cron
tail -f $LOGFIFO

Dockerfile 与 构建镜像

FROM mongo

VOLUME /var/log/

RUN apt-get update \
  && apt-get install -y python3 python3-pip cron curl\
  && rm -rf /var/lib/apt/lists/* \
  && pip3 install awscli

WORKDIR /script
ADD *.sh /script/
RUN chmod +x /script/*.sh

ENTRYPOINT ["/script/start.sh"]
docker build -t backup_mongo_data_to_s2:1.0 .

Running

备份 docker-compose.yml

---
version: "3.1"

services:
  backup_mongo:
    image: backup_mongo_data_to_s2:1.0
    restart: always
    environment:
      - AWS_DEFAULT_REGION="AWS_DEFAULT_REGION"
      - AWS_ACCESS_KEY_ID="AWS_ACCESS_KEY_ID"
      - AWS_SECRET_ACCESS_KEY="AWS_SECRET_ACCESS_KEY"
      - MONGO_HOST="MONGO_REPLICA_SET_NAME/HOST1:PORT1,HOST2:PORT2"
      - MONGO_USERNAME="MONGO_USERNAME"
      - MONGO_PASSWORD="MONGO_PASSWORD"
      - S3_BUCKET="S3_BUCKET"
      - S3_PATH="S3_PATH"
      - CRON_SCHEDULE=0 1 * * *
      - TZ=Asia/Shanghai
      - BACKUP_FILENAME_DATE_FORMAT=%Y-%m-%d-%H-%M-%S
      - S3_HISTORY_NUM=20
    volumes:
      - /var/log:/var/log
    logging:
      driver: json-file
      options:
        max-size: "50m"
        max-file: "10"
...

docker-compose.yml 的变量替换为自己的

  • AWS_DEFAULT_REGION AWS可用区
  • AWS_ACCESS_KEY_ID AWS S3 访问KEYID
  • AWS_SECRET_ACCESS_KEY AWS S3 访问SECRET
  • MONGO_HOST MONGO URI
  • MONGO_USERNAME MONGO 用户名
  • MONGO_PASSWORD MONGO 密码
  • S3_BUCKET S3 Bucket名称
  • S3_PATH S3 备份路径
  • CRON_SCHEDULE 任务执行CRON表达式
docker-compose up -d 

最后修改于 2021-08-27