ZFS Replication Script
#!/usr/bin/env bash
#
# zfs-pull.sh
#
# Pulls incremental ZFS snapshots from a remote (source) server to the local (destination) server.
# Uses snapshots made by zfs-auto-snapshot. Locates the latest snapshot common to both sides
# to perform an incremental replication; if none is found, it does a full send.
#
# Usage: replicate-zfs-pull.sh <SOURCE_HOST> <SOURCE_DATASET> <DEST_DATASET>
#
# Example:
# ./replicate-zfs-pull.sh mysourcehost tank/mydata tank/backup/mydata
#
# Assumptions/Notes:
# - The local server is the destination. The remote server is the source.
# - We're using "zfs recv -F" locally, which can forcibly roll back the destination
# dataset if it has diverging snapshots. Remove or change -F as desired.
# - This script is minimal and doesn't handle advanced errors or timeouts gracefully.
# - Key-based SSH authentication should be set up so that `ssh <SOURCE_HOST>` doesn't require a password prompt.
#
set -euo pipefail
##############################################################################
# 1. Parse command-line arguments
##############################################################################
if [[ $# -ne 3 ]]; then
echo "Usage: $0 <SOURCE_HOST> <SOURCE_DATASET> <DEST_DATASET>"
exit 1
fi
SOURCE_HOST="$1"
SOURCE_DATASET="$2"
DEST_DATASET="$3"
##############################################################################
# 2. Gather snapshot lists
#
# The command zfs list -H -t snapshot -o name -S creation -d 1
# -H : Output without headers for script-friendliness
# -t snapshot : Only list snapshots
# -o name : Only list the name
# -d 1 : Only descend one level - i.e. don't tree out child datasets
##############################################################################
# - Remote (source) snapshots: via SSH to the remote host
# - Local (destination) snapshots: from the local ZFS
echo "Collecting snapshots from remote source: ${SOURCE_HOST}:${SOURCE_DATASET}..."
REMOTE_SNAPSHOTS=$(ssh "${SOURCE_HOST}" zfs list -H -t snapshot -o name -d 1 "${SOURCE_DATASET}" 2>/dev/null \
| grep "${SOURCE_DATASET}@" \
| awk -F'@' '{print $2}' || true)
echo "Collecting snapshots from local destination: ${DEST_DATASET}..."
LOCAL_SNAPSHOTS=$(zfs list -H -t snapshot -o name -d 1 "${DEST_DATASET}" 2>/dev/null \
| grep "${DEST_DATASET}@" \
| awk -F'@' '{print $2}' || true)
##############################################################################
# 3. Find the latest common snapshot
#
# The snapshots names have prefixes like "zfs-auto-snap_daily" and "zfs-auto-snap_hourly"
# that confuse sorting for the linux comm program, so we strip the prefix with sed before
# using 'comm -12' to find common elements of input 1 and 2, and tail to get the last one.
#
COMMON_SNAPSHOT=$(comm -12 <(echo "$REMOTE_SNAPSHOTS" | sed 's/zfs-auto-snap_\w*-//' | sort) <(echo "$LOCAL_SNAPSHOTS" | sed 's/zfs-auto-snap_\w*-//' | sort) | tail -n 1)
# We need the full name back for the transfer, so grep it out of the local list. Make sure to quote the variable sent to grep or you'll loose the newlines.
COMMON_SNAPSHOT=$(echo "$LOCAL_SNAPSHOTS" | grep $COMMON_SNAPSHOT)
if [[ -n "$COMMON_SNAPSHOT" ]]; then
echo "Found common snapshot: $COMMON_SNAPSHOT"
else
echo "No common snapshot found—will perform a full send."
fi
##############################################################################
# 4. Identify the most recent snapshot on the remote source
#
# This works because we zfs list'ed the snapshots originally in order
# so we can just take the first line with 'head -n 1'
##############################################################################
LATEST_REMOTE_SNAPSHOT=$(echo "$REMOTE_SNAPSHOTS" | head -n 1)
if [[ -z "$LATEST_REMOTE_SNAPSHOT" ]]; then
echo "No snapshots found on the remote source. Check if zfs-auto-snapshot is enabled there."
exit 1
fi
##############################################################################
# 5. Perform replication
##############################################################################
echo "Starting pull-based replication from ${SOURCE_HOST}:${SOURCE_DATASET} to local ${DEST_DATASET}..."
if [[ -n "$COMMON_SNAPSHOT" ]]; then
echo "Performing incremental replication from @$COMMON_SNAPSHOT up to @$LATEST_REMOTE_SNAPSHOT."
ssh "${SOURCE_HOST}" zfs send -I "${SOURCE_DATASET}@${COMMON_SNAPSHOT}" "${SOURCE_DATASET}@${LATEST_REMOTE_SNAPSHOT}" \
| zfs recv -F "${DEST_DATASET}"
else
echo "Performing full replication of @$LATEST_REMOTE_SNAPSHOT."
ssh "${SOURCE_HOST}" zfs send "${SOURCE_DATASET}@${LATEST_REMOTE_SNAPSHOT}" \
| zfs recv -F "${DEST_DATASET}"
fi
echo "Replication completed successfully!"
Feedback
Was this page helpful?
Glad to hear it! Please tell us how we can improve.
Sorry to hear that. Please tell us how we can improve.