-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ceph: add example BlueStore migration via the ceph-objectstore-tool d…
…up operation Signed-off-by: Anthony D'Atri <[email protected]>
1 parent
d855dbb
commit 2a181ef
Showing
3 changed files
with
393 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
contrib | ||
================== | ||
|
||
This directory houses scripts and other files that may be useful to Ceph | ||
administrators. Everything here is provided *as-is*, and may or may | ||
not be up-to-date or functional. Code may not be up to official standards. | ||
Please do not assume any level of support. Your mileage may vary. | ||
|
||
Each file's header must include a tracker number and an author signed-off-by | ||
line. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,370 @@ | ||
#!/bin/bash | ||
# https://tracker.ceph.com/issues/47839 | ||
# Signed-off-by: Chris Dunlop <chris@onthe.net.au> | ||
|
||
|
||
###################################################################### | ||
function usage | ||
{ | ||
cat <<END | ||
Usage: $0 osd device | ||
Description: | ||
Migrate an OSD from Filestore to BlueStore | ||
Where: | ||
osd - OSD ID to migrate | ||
device - raw device to migrate to, starting with /dev/disk/by-id/ | ||
E.g.: | ||
ceph-migrate-bluestore 6 /dev/disk/by-id/ata-WDC_WD80EFZX-68UW8N0_VK0RKXTY | ||
END | ||
exit 0 | ||
} | ||
###################################################################### | ||
|
||
shopt -s -o errexit nounset pipefail | ||
shopt -s extglob failglob inherit_errexit lastpipe | ||
|
||
[[ $# -eq 2 ]] || usage | ||
osd=$1 | ||
bluestore_device=$2 | ||
|
||
[[ $osd =~ ^[0-9]+$ ]] || error 'osd must be numeric' | ||
[[ | ||
-b $bluestore_device && | ||
$bluestore_device =~ ^/dev/disk/by-id/ && | ||
! $bluestore_device =~ -part[0-9]+$ | ||
]] || error "device must be a raw block device starting with /dev/disk/by-id/" | ||
|
||
###################################################################### | ||
# Setup... | ||
# | ||
|
||
# | ||
# VG used for block.db LVs | ||
# | ||
vgdb='vg-861d7200-578c-45c2-a44c-2f0c56427bf1' | ||
vgs "${vgdb}" >& /dev/null || error "VG '${vgdb}' for block.db not found" | ||
|
||
# | ||
# Size of LV in $vgdb for the block.db | ||
# | ||
dblvsize=60G | ||
|
||
# | ||
# Prefix used for block LVs | ||
# | ||
block_prefix='osd-block' | ||
|
||
# | ||
# Some less(?) common we use - abort early if they're missing | ||
# | ||
cmds=( | ||
bc | ||
sgdisk | ||
) | ||
|
||
###################################################################### | ||
# Functions... | ||
# | ||
function runcmd | ||
{ | ||
local IFS=' ' | ||
echo 1>&2 "$*" | ||
"$@" | ||
} | ||
|
||
function is_uuid | ||
{ | ||
[[ $1 =~ ^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$ ]] | ||
} | ||
|
||
# | ||
# Compare the used size of the OSD with the new device | ||
# (and arbitrarily 20% larger 'cos we don't want to fill it up) | ||
# | ||
function check-device-size | ||
{ | ||
# "ceph osd df" fields 7 and 8 - "RAW USE", size and units | ||
IFS=' ' read -r sz units <<< "$(ceph osd df | awk -v"id=${osd}" '$1==id { print $7, $8 }')" | ||
case $units in | ||
KiB) pow=1 ;; | ||
MiB) pow=2 ;; | ||
GiB) pow=3 ;; | ||
TiB) pow=4 ;; | ||
PiB) pow=5 ;; | ||
*) error "ceph df: units not recognized: ${units}" ;; | ||
esac | ||
osdbytes=$(printf '%.0f' "$(bc <<< "${sz} * 1024^${pow} * 1.2")") | ||
|
||
bdev=$(realpath "${bluestore_device}") | ||
bdev=${bdev##*/} | ||
[[ -e /sys/block/${bdev##*/}/size ]] || error "Can't find size for ${bluestore_device}" | ||
bdevbytes=$(($(<"/sys/block/${bdev##*/}/size") * 512)) | ||
|
||
declare -p osdbytes bdevbytes | ||
|
||
((bdevbytes >= osdbytes)) || error "The block device isn't large enough" | ||
} | ||
|
||
# | ||
# Check things look ok | ||
# | ||
# Is there a better way of checking, other than manually? | ||
# | ||
function check-ceph-ok | ||
{ | ||
local ans=r | ||
|
||
while [[ $ans = r ]] | ||
do | ||
runcmd ceph -s | ||
read -r -p $'\nCheck status above and press r to recheck or <Enter> to continue with scrub' ans | ||
done | ||
|
||
# | ||
# Run a scrub "to be sure, to be sure" | ||
# | ||
# For smaller OSDs we can see which PGs we need to watch for... | ||
# | ||
runcmd ceph pg ls-by-primary "${osd}" | awk '$1~/^[0-9]+\./ { print $1 }' | ||
runcmd ceph osd scrub "${osd}" | ||
|
||
hr | ||
tail -n0 -f "/var/log/ceph/ceph-osd.${osd}.log" & | ||
pid=$! | ||
sleep 2 | ||
while ! read -r -t 10 -p $'\n\n\ntailing osd log file: press <Enter> to continue\n\n\n' ans | ||
do | ||
: | ||
done | ||
kill "${pid}" | ||
hr | ||
|
||
ans=r | ||
while [[ $ans = r ]] | ||
do | ||
runcmd ceph -s | ||
read -r -p $'\nCheck status above and press r to recheck or <Enter> to continue' ans | ||
done | ||
} | ||
|
||
# | ||
# Disable the FileStore so it doesn't attempt to come back on reboot, but | ||
# so we can revert back to it if necessary | ||
# | ||
# https://en.wikipedia.org/wiki/GUID_Partition_Table#Partition_type_GUIDs | ||
# Partition GUID code: 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D (Ceph OSD) | ||
# Partition GUID code: 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data) | ||
# | ||
function disable-filestore | ||
{ | ||
# | ||
# Remove the original device from fstab if it's there | ||
# (it may be in here for xfs with logdev etc.) | ||
# | ||
if grep -qE '^[^#[:space:]]+[[:space:]]+'"${osddir}"'[[:space:]]' /etc/fstab | ||
then | ||
[[ -e /etc/fstab.${0##*/} ]] || cp -a /etc/fstab{,."${0##*/}"} | ||
sed -ri '/^[^#[:space:]]+[[:space:]]+'"${osddir//\//\\\/}"'[[:space:]]/ s/^/# /' /etc/fstab | ||
fi | ||
|
||
# | ||
# Change the partition type | ||
# | ||
[[ -e ${osd_json%.json}.part ]] || | ||
runcmd sgdisk --backup="${osd_json%.json}.part" "${filestore_device}" | ||
part_guid=$(sgdisk -i1 "${filestore_device}" | sed -rn 's/^Partition GUID code: ([[:xdigit:]-]+) .*/\1/p') | ||
if [[ $part_guid = 4FBD7E29-9D25-41B8-AFD0-062C0CEFF05D ]] | ||
then | ||
runcmd sgdisk --typecode=1:0FC63DAF-8483-4772-8E79-3D69D8477DE4 "${filestore_device}" | ||
echo "${filestore_device} partition 1 changed to type 0FC63DAF-8483-4772-8E79-3D69D8477DE4 (Linux filesystem data)" | ||
fi | ||
} | ||
|
||
###################################################################### | ||
# Processing... | ||
# | ||
|
||
# | ||
# Check we have the commands we need | ||
# | ||
for cmd in "${cmds[@]}" | ||
do | ||
type "${cmd}" >& /dev/null || error "${cmd} utility required" | ||
done | ||
|
||
# | ||
# Get/check OSD | ||
# | ||
unit=ceph-osd@${osd} | ||
runcmd systemctl is-enabled "${unit}" || | ||
error "systemd unit ${unit} not enabled" | ||
|
||
osddir=/var/lib/ceph/osd/ceph-${osd} | ||
[[ -d $osddir ]] || error "No directory: ${osddir}" | ||
|
||
fsid=$(< "${osddir}/fsid") | ||
is_uuid "${fsid}" || error "fsid uuid not found in ${osddir}/fsid" | ||
|
||
osd_json=/etc/ceph/osd/${osd}-${fsid}.json | ||
[[ -f $osd_json ]] || error "File doesn't exist: ${osd_json}" | ||
|
||
lvnewdb=${vgdb}/osd-db-${fsid} | ||
authkey=$(sed -rn 's/^[[:space:]]+key[[:space:]]*=[[:space:]]*//p' "${osddir}/keyring") | ||
[[ $authkey ]] || error "Can't get authkey from ${osddir}/keyring" | ||
|
||
# | ||
# We want the device containing the FileStore version of the OSD | ||
# so we can disable it once the BlueStore version is up and running, | ||
# so the FileStore doesn't contend with the BlueStore on reboot etc. | ||
# | ||
filestore_device=$(awk '$2=="'"${osddir}"'" { print $1; }' /etc/mtab) | ||
[[ $filestore_device ]] || error "Can't find device currently mounted on ${osddir}" | ||
[[ $filestore_device =~ ^/dev/sd[a-z]+[0-9]*$ ]] || error "Don't recognize device currently mounted on ${osddir}: ${filestore_device}" | ||
filestore_device=${filestore_device%%+([0-9])} | ||
|
||
declare -p unit block_prefix bluestore_device osd osddir fsid osd_json lvnewdb authkey filestore_device | ||
|
||
runcmd check-device-size | ||
|
||
# | ||
# Create raw LV for block.db | ||
# | ||
runcmd lvcreate --yes -L "${dblvsize}" -n "${lvnewdb#*/}" "${lvnewdb%/*}" | ||
|
||
|
||
# | ||
# Prepare the new OSD | ||
# osd-list.orig is so we can work out which osd was created | ||
# | ||
ceph osd ls > /tmp/osd-list.orig | ||
runcmd ceph-volume lvm prepare --data "${bluestore_device}" --block.db "${lvnewdb}" | ||
|
||
# | ||
# Work out which OSD has been created | ||
# Is there a better way of doing this? | ||
# | ||
ceph osd ls > /tmp/osd-list.new | ||
|
||
new=$(comm -13 /tmp/osd-list.{orig,new}) | ||
[[ $new =~ ^[0-9]+$ ]] || error "New OSD id not found" | ||
|
||
# | ||
# remove the new OSD from the ceph database | ||
# (it's left mounted) | ||
# | ||
runcmd ceph osd purge "${new}" --yes-i-really-mean-it | ||
|
||
# | ||
# Params for the newly created OSD | ||
# | ||
newdir=/var/lib/ceph/osd/ceph-${new} | ||
lvnew=$(readlink "${newdir}/block"); lvnew=${lvnew#/dev/} | ||
|
||
# | ||
# lvfix is what we're going to rename the LV to so | ||
# it ends in the (original) fsid | ||
# | ||
is_uuid "${lvnew#*/${block_prefix}-}" || error "LV not recognised: ${lvnew}" | ||
lvfix=${lvnew%%/*}/${block_prefix}-${fsid} | ||
|
||
declare -p new newdir lvnew lvfix | ||
|
||
# | ||
# the "dup" step only works if the destination has the same id and fsid | ||
# as the source: fix 'em up | ||
# | ||
new_fsid=$(< "${newdir}/fsid") | ||
args=( | ||
--deltag "ceph.osd_id=${new}" | ||
--addtag "ceph.osd_id=${osd}" | ||
|
||
--deltag "ceph.osd_fsid=${new_fsid}" | ||
--addtag "ceph.osd_fsid=${fsid}" | ||
|
||
--deltag "ceph.block_device=${lvnew}" | ||
--addtag "ceph.block_device=${lvfix}" | ||
) | ||
runcmd lvchange "${args[@]}" "${lvnew}" | ||
runcmd lvchange "${args[@]}" "${lvnewdb}" | ||
|
||
runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key whoami --value "${osd}" | ||
runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_uuid --value "${fsid}" | ||
runcmd ceph-bluestore-tool set-label-key --dev "${newdir}/block.db" --key osd_uuid --value "${fsid}" | ||
|
||
echo "${fsid}" > "${newdir}/fsid" | ||
|
||
# | ||
# Rename the LV so it ends in the (original) fsid | ||
# | ||
runcmd lvrename "${lvnew}" "${lvfix}" | ||
runcmd ln -sf "/dev/${lvfix}" "${newdir}/block" | ||
lvnew=$lvfix | ||
|
||
# | ||
# Remove the flags that mkfs has already been done - otherwise mkfs skips the actual mkfs! | ||
# | ||
runcmd ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key mkfs_done | ||
runcmd rm "${newdir}/mkfs_done" | ||
|
||
# | ||
# Empty out the new OSD filesystem | ||
# | ||
runcmd ceph-objectstore-tool --type bluestore --data-path "${newdir}" --fsid "${fsid}" --op mkfs --no-mon-config | ||
|
||
# | ||
# Stop the osd - the copy can't proceed if it's busy | ||
# | ||
runcmd systemctl is-active --quiet "ceph-osd@${osd}" && | ||
runcmd systemctl stop "ceph-osd@${osd}" | ||
|
||
# | ||
# The actual copy... | ||
# | ||
runcmd time ceph-objectstore-tool --type filestore --data-path "/var/lib/ceph/osd/ceph-${osd}" --target-data-path "${newdir}" --op dup | ||
|
||
# | ||
# Fix up some keys from the copy | ||
# | ||
printf '[osd.%d]\n\tkey = %s\n' "${osd}" "${authkey}" > "${newdir}/key" | ||
ceph-bluestore-tool set-label-key --dev "${newdir}/block" --key osd_key --value "${authkey}" | ||
ceph-bluestore-tool rm-label-key --dev "${newdir}/block" --key fsid | ||
|
||
# | ||
# Move the FileStore config file out of the way to avoid it being used on boot | ||
# | ||
runcmd mv "${osd_json}"{,.orig} | ||
|
||
# | ||
# prepare the mount points | ||
# | ||
runcmd umount "${osddir}" | ||
runcmd umount "${newdir}" | ||
runcmd rmdir "${newdir}" | ||
|
||
# | ||
# Start the new BlueStore version of the OSD | ||
# | ||
runcmd ceph-volume lvm trigger "${osd}-${fsid}" | ||
|
||
# | ||
# Let things settle a little then check the new OSD is running | ||
# | ||
sleep 5 | ||
if ! systemctl is-active --quiet "${unit}" | ||
then | ||
systemctl status "${unit}" | ||
exit 1 | ||
fi | ||
|
||
runcmd check-ceph-ok | ||
|
||
runcmd disable-filestore | ||
|
||
exit 0 |