#!/bin/sh # # lvcheck # Released under the GNU General Public License, either version 2 or # (at your option) any later version. # Overview: # # Run this from cron each night. If the machine is on AC power, it # will run the checks; otherwise they will all be skipped. (If the # script can't tell whether the machine is on AC power, a setting in # the configuration file (/etc/lvcheck.conf) decides whether it will # continue with the checks, or abort.) # # The script will then decide which logical volumes are active, and # can therefore be checked via an LVM snapshot. Each of these LVs # will be queried to find its last-check day, and if that was more # than $INTERVAL days ago (where INTERVAL is set in the configuration # file as well), then the script will take an LVM snapshot of that # LV and run fsck on the snapshot. The snapshot will be set to use # all the remaining space in its volume group. After fsck finishes, # the snapshot is destroyed. (Snapshots are checked serially.) # # Any LV that passes fsck will have its last-check time updated (in # the real superblock, not the snapshot's superblock); any LV whose # fsck fails will send an email notification to a configurable user # ($EMAIL). This $EMAIL setting is optional, but its use is highly # recommended, since if any LV fails, it will need to be checked # manually, offline. function on_ac_power() { local any_known=no # try sysfs power class first if [ -d /sys/class/power_supply ] ; then for psu in /sys/class/power_supply/* ; do if [ -r "${psu}/type" ] ; then type="`cat "${psu}/type"`" # ignore batteries [ "${type}" = "Battery" ] && continue online="`cat "${psu}/online"`" [ "${online}" = 1 ] && return 0 [ "${online}" = 0 ] && any_known=yes fi done [ "${any_known}" = "yes" ] && return 1 fi # else fall back to AC adapters in /proc if [ -d /proc/acpi/ac_adapter ] ; then for ac in /proc/acpi/ac_adapter/* ; do if [ -r "${ac}/state" ] ; then grep -q on-line "${ac}/state" && return 0 grep -q off-line "${ac}/state" && any_known=yes elif [ -r "${ac}/status" ] ; then grep -q on-line "${ac}/status" && return 0 grep -q off-line "${ac}/status" && any_known=yes fi done [ "${any_known}" = "yes" ] && return 1 fi if [ "$AC_UNKNOWN" == "CONTINUE" ] ; then return 0 # assume on AC power elif [ "$AC_UNKNOWN" == "ABORT" ] ; then return 1 # assume on battery else echo "Invalid value for AC_UNKNOWN in the config file" >&2 exit 1 fi } # attempt to force a check of $1 on the next reboot function try_force_check() { local dev="$1" local fstype="$2" case "$fstype" in ext2|ext3) tune2fs -C 16000 -T "19000101" "$dev" ;; reiserfs) # ??? echo "Don't know how to set the last-check time on reiserfs..." >&2 ;; *) echo "Don't know how to set the last-check time on $fstype..." >&2 ;; esac } # attempt to set the last-check time on $1 to now, and the mount count to 0. function try_delay_checks() { local dev="$1" local fstype="$2" case "$fstype" in ext2|ext3) tune2fs -C 0 -T now "$dev" ;; reiserfs) # do nothing? apparently so... ;; *) echo "Don't know how to reset the last-check time on $fstype..." >&2 ;; esac } # print the date that $1 was last checked, in a format that date(1) will # accept, or "Unknown" if we don't know how to find that date. function try_get_check_date() { local dev="$1" local fstype="$2" case "$fstype" in ext2|ext3) dumpe2fs -h "$dev" 2>/dev/null | grep 'Last checked:' | \ sed -e 's/Last checked:[[:space:]]*//' ;; *) # TODO: add support for various FSes here echo "Unknown" ;; esac } # check the FS on $1 passively, printing output to $3. function perform_check() { local dev="$1" local fstype="$2" local tmpfile="$3" case "$fstype" in ext2|ext3) # the only point in fixing anything is just to see if fsck can. nice logsave -as "${tmpfile}" fsck.${fstype} -p -C 0 "$dev" && nice logsave -as "${tmpfile}" fsck.${fstype} -fy -C 0 "$dev" return $? ;; reiserfs) echo Yes | nice logsave -as "${tmpfile}" fsck.reiserfs --check "$dev" # apparently can't fail? let's hope not... return 0 ;; *) echo "Don't know how to check $fstype filesystems passively..." >&2 ;; esac } # do everything needed to check and reset dates and counters on /dev/$1/$2. function check_fs() { local vg="$1" local lv="$2" local fstype="$3" local tmpfile=`mktemp -t e2fsck.log.XXXXXXXXXX` trap "rm $tmpfile ; trap - RETURN" RETURN # only one check happens at a time; using all the free space in the VG # at least won't prevent other checks from happening... lvcreate -s -l "100%FREE" -n "${lv}-snap" "${vg}/${lv}" if perform_check "/dev/${vg}/${lv}-snap" "${fstype}" "${tmpfile}" ; then echo 'Background scrubbing succeeded!' try_delay_checks "/dev/${vg}/${lv}" "$fstype" else echo 'Background scrubbing failed! Reboot to fsck soon!' try_force_check "/dev/${vg}/${lv}" "$fstype" if test -n "$EMAIL"; then mail -s "Fsck of /dev/${vg}/${lv} failed!" $EMAIL < $tmpfile fi fi lvremove -f "${vg}/${lv}-snap" } set -e # pull in configuration -- don't bother with a parser, just use the shell's . /etc/lvcheck.conf # check whether the machine is on AC power: if not, skip fsck on_ac_power || exit 0 # parse up lvscan output lvscan 2>&1 | grep ACTIVE | awk '{print $2;}' | \ while read DEV ; do # remove the single quotes around the device name DEV="`echo "$DEV" | tr -d \'`" # get the FS type FSTYPE="`/lib/udev/vol_id -t "$DEV"`" # get the last-check time, throw away the time portion, and # add $INTERVAL days check_date=`try_get_check_date "$DEV" "$FSTYPE"` # if the date is unknown, run fsck every day. sigh. if [ "$check_date" != "Unknown" ] ; then check_day=`date --date="$check_date $INTERVAL days" +'%Y%m%d'` # get today's date, and skip the check if it's not within the interval today=`date +'%Y%m%d'` [ $check_day -gt $today ] && continue fi # get the free space SPACE="`lvs --noheadings -o vg_free "$DEV"`" # ensure that some free space exists, at least # ??? -- can lvs print vg_free in plain numbers, or do I have to # figure out what a suffix of "m" means? skip the check for now. # get the volume group and logical volume names VG="`lvs --noheadings -o vg_name "$DEV"`" LV="`lvs --noheadings -o lv_name "$DEV"`" # check it check_fs "$VG" "$LV" "$FSTYPE" done