#!/bin/sh # # Copyright (c) 2015 Christian Seiler # # License: MIT, # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # # # Rationale # --------- # # This is needed because when a bind-mount is first created, all # specified options will be ignored, i.e. the mountpoint options will # be a duplicate of the options of the source mountpoint. # # Only if you remount the mountpoint will the options be applied. # # Therefore, if you add a bind mount to /etc/fstab, the options # specified alongside bind will not be applied on boot. In sysvinit # systems, one could add multiple mount points with the same target to # cause 'mount -a' to remount it at the second line, but systemd # enforces uniqueness of mount points, i.e. you cannot specify the same # directory twice. # # This generator is a way to work around this problem. It parses # /etc/fstab (in parallel to systemd's own fstab-generator) and writes # out small snippets of systemd configuration that cause the bind # mounts specified in /etc/fstab to be remounted immediately after # their initial mounting. # # The idea for the generated units is based what is described in [1], # but the implementation is much more generic. # [1] http://www.sagui.org/~gustavo/blog/fedora/read-only-bind-mount-with-systemd.html # # Note that this is implemented as a shell script for now, but would be # more efficient as a C binary. # # Basic logic # ----------- # # 1. generate two service units, # bindmount-local@.service and bindmount-remote@.service # These services just remount the filesystem, causing the mount # points to acquire the options in /etc/fstab. The service is # implemented as a oneshot service, so it immediately goes back # to inactive after it completes its actions, allowing it to be # pulled back in if the mount unit is to be started again at a # later point in time. # # 2. read in /etc/fstab, look for all bind mounts (that are not # recursive binds) # # 3. generate a drop-in configuration for each of these bind mounts to # make sure they pull in (via Require=) # bindmount-$type@$path.service # # The dependencies have the following consequences: # # - drop-in: Requires=bindmount-$type@$path.service # -> if the mount is to be started, it will pull in our generated # service # - service: After=%i.mount # -> the service will be ordered after the mount itself, i.e. we # remount the mount point only after it was initially mounted # - service: Before=local-fs.target (or remote-fs.target) # -> make sure that the remounting is done before any services are # started that might need the filesystems (services are ordered # after either local-fs.target or remote-fs.target) # # Preconditions # ------------- # # Note that this generator is executed by systemd at a VERY early stage # of the boot process. # # First of all: logging is only possible to /dev/kmsg (we redirect # stdout and stderr there first thing). We also try to log as little as # possible since this generator will be executed every time systemctl # daemon-reload is called. # # / is still read-only, /tmp is not yet mounted when this is called for # the first time. IMPORTANT: Heredoc support in most shells requires # temporary files, so use lot's of regular echos. # # This is run in parallel to all other generators, so we can't rely on # them being done yet. # # In some containers maybe we don't have /dev/kmsg. This means that we # won't be able to log anything there, but before our logic creates a # regular file in the /dev filesystem, just forward messages to # /dev/null if the file doesn't exist. Not nice, but no other option # for us. if [ -c "/dev/kmsg" ] ; then logdevice="/dev/kmsg" else logdevice="/dev/null" fi # redirect stdout + stderr to the log device exec >"$logdevice" 2>&1 # this should work on all distros, /usr-merged or not, but if you have # a /usr-merged distro and it doesn't have a compat symlink for /bin # (or /bin/mount), just change this accordingly MOUNT_EXE=/bin/mount # we need two of these units that are basically identical, but with # different dependencies, so make a function out of it generate_unit() { # Note that since we always order after the mount unit, we # don't need to take care of the dependencies of the mount unit # itself. The only tricky thing is the Before= dependency on # the filesystem target, which is why we need two units. echo "[Unit]" echo "Description=Remouning bind mount with proper fstab options" echo "DefaultDependencies=no" echo "After=%i.mount" echo "Before=$1" echo "" echo "[Service]" echo "Type=oneshot" echo "ExecStart=${MOUNT_EXE} -o remount,bind /%I" } # determine if a fstype is network filesystem (typically fstype should # be none, so if this really is a on a network fs, the user should # specify _netdev, but just in case...) is_network_filesystem() { # systemd's logic: strip fuse, check for a hard-coded list of # file systems if [ x"${1#fuse.}"x != x"$1"x ] ; then is_network_filesystem "${1#fuse.}" return $? fi case "$1" in cifs|smbfs|sshfs|ncpfs|ncp|nfs|nfs4|gfs|gfs2|glusterfs) return 0 ;; *) return 1 ;; esac } # kill multiple slashes in path (systemd does no other normalization) path_kill_slashes() { echo "$1" | sed 's%//*%/%g' } # systemd-escape implementation sdescape() { # requires systemd-216 # (Debian Jessie's -215 also supports this) if which systemd-escape >/dev/null 2>&1 ; then $(which systemd-escape) "$@" return $? fi # poor man's systemd escaping with perl if which perl >/dev/null 2>&1 ; then echo "$1" | perl -pe 'chomp; s%((?:^\.)|[^A-Za-z0-9:_./])%sprintf("\\x\%02x", ord($1))%ge; s%/%-%g; s%$%\n%;' 2>/dev/null return $? fi # very poor man's systemd escaping (no perl) # basic idea from: # http://it.toolbox.com/wiki/index.php/Uri_escape_bash_shell_function # tested in bash, dash and mksh (2>/dev/null w/ printf is to # supress mksh warnings) Note that for mksh it only works in # posix mode (i.e. run as /bin/sh, because mksh assumes -e for # echo by default) Also note that while this escaping routine # was tested in mksh, the rest of the script wasn't. _se_str="$1" _se_tmp="" _se_first=1 while [ -n "$_se_str" ] ; do case "$_se_str" in .*) # no leading dots allowed if [ $_se_first -eq 1 ] ; then _se_tmp="$(printf "%s\\\\x%02x" "$_se_tmp" "'$_se_str" 2>/dev/null)" #' else _se_tmp="$(printf "%s%c" "$_se_tmp" "$_se_str" 2>/dev/null)" fi ;; /*) _se_tmp="${_se_tmp}-" ;; [A-Za-z0-9:_]*) _se_tmp="$(printf "%s%c" "$_se_tmp" "$_se_str" 2>/dev/null)" ;; *) _se_tmp="$(printf "%s\\\\x%02x" "$_se_tmp" "'$_se_str" 2>/dev/null)" ;; #' esac _se_str="${_se_str#?}" _se_first=0 done echo "$_se_tmp" } # determine if a specific mount option is set, but it's not the only # mount option in the list has_mntopt_but_not_only() { # Usage: has_mntopt option all_options case "$2" in "$1,"*|*",$1"|*",$1,"*) return 0 ;; *) return 1 ;; esac } # determine if a specific mount option is set, might be the only one or # one of many has_mntopt() { [ x"$1"x = x"$2"x ] && return 0 has_mntopt_but_not_only "$1" "$2" return $? } # get own name PROGNAME="$(basename "$0")" if [ -z "$PROGNAME" ] ; then PROGNAME="bindmount-generator" fi # logging functions for /dev/kmsg (generate the messages in a way that # looks like syslog) err() { echo "<3>${PROGNAME}[$$]: $@" >&2 exit 1 } warn() { echo "<4>${PROGNAME}[$$]: $@" >&2 } notice() { echo "<5>${PROGNAME}[$$]: $@" >&2 } info() { echo "<6>${PROGNAME}[$$]: $@" >&2 } debug() { echo "<7>${PROGNAME}[$$]: $@" >&2 } # make sure it's a file and readable [ -f /etc/fstab ] || { info "/etc/fstab not a file" ; exit 0 ; } [ -r /etc/fstab ] || { info "/etc/fstab not readable" ; exit 0 ; } # unit dir: late generators unit_dir="$3" [ -d "$unit_dir" ] || err "generator dir $unit_dir not a directory" # ALWAYS generate both units # (makes it easier for the administrator if they want to override # something, because they are always there, regardless of whether they # are needed or not) generate_unit local-fs.target > "$unit_dir/bindmount-local@.service" generate_unit remote-fs.target > "$unit_dir/bindmount-remote@.service" # read /etc/fstab, but remove all comments and empty lines first sed 's%#.*%%' < /etc/fstab 2>/dev/null \ | grep -v ^$ 2>/dev/null \ | while read src dst fstype options dump pass _otherfields do # Ignore lines we don't understand, i.e. either some fields are # completely empty, OR we parsed too many fields. if [ -z "$src" ] || [ -z "$dst" ] || [ -z "$fstype" ] || \ [ -z "$options" ] || [ -z "$dump" ] || [ -z "$pass" ] || \ [ -n "$_otherfields" ] ; then continue fi # remove duplicate slashes (same normalization that systemd # does) dst="$(path_kill_slashes "$dst")" # we don't want to go anywhere NEAR the rootfs if [ x"$dst"x = x"/"x ] ; then continue fi # Note that doing additional sanity checks here, such as # whether the mount point exists or not, DOESN'T make sense, # since we are run really early at boot before anything itself # is actually mounted, therefore checking for this won't work # in general (it might in very specific cases). Also, note that # systemd creates mount points by default if they don't exist # yet, so that's not an error anyway. # We don't support rbind (recursive bind mounts), because there # it's not clear where to apply the options to, so only check # for the bind option. Also, if it's only 'bind', then the user # doesn't want to change options, so we don't want to generate # anything either (if /etc/fstab is changed and systemd is to # notice these changes, systemctl daemon-reload has to be run # and this generator is rerun as well). if has_mntopt_but_not_only "bind" "$options" ; then # remove slash at beginning, escape the value for usage # in unit names dst=$(sdescape "${dst#/}") mkdir -p "${unit_dir}/${dst}.mount.d" 2>/dev/null if [ $? -ne 0 ] ; then warn "could not create ${unit_dir}/${dst}.mount.d" continue fi # Determine if this is on a remote filesystem, so we # can decide our Before= ordering properly. We use the # same logic as systemd's internal source code for # this: a mount is considered to be remote iff # - the _netdev option is set # OR # - the filesystem type is a remote filesystem # (see is_network_filesystem above for details) if has_mntopt "_netdev" "$options" || \ is_network_filesystem "$fstype" ; then UNIT="bindmount-remote" else UNIT="bindmount-local" fi # Using a drop-in is easiest, since we don't want to # check whether our unit has been overridden (needed # if you want to use a symlink in .requires/) echo "[Unit]" > "${unit_dir}/${dst}.mount.d/bindmount.conf" echo "Requires=${UNIT}@${dst}.service" >> "${unit_dir}/${dst}.mount.d/bindmount.conf" fi done