From 430904918c1164bf08f54fa6f16ac1f7cabc84d2 Mon Sep 17 00:00:00 2001 From: Eric Thibodeau Date: Thu, 21 Aug 2008 00:04:38 -0400 Subject: DHCP callback script corrections and config file generation corrections - cluster-setup: *$PBS_SERVER_HOME/server_priv/nodes is now generated by the ebuild */etc/init.d/pbs_sched start added - nfsroot_setup.sh: * /etc/conf.d/net was not generated correctly, vars were interpreted during the config file generation and no put into the config file - beowulf-head-0.1.ebuild: * There were still some nonfsv4 incorrect USE flags - node-manager: * logic rework and some test corrections * less implcit mangling of $@ on function calls - torque-add: * we need to restart pbs_server on modifying $PBS_SERVER_HOME/server_priv/nodes ** this is horrible but the only reliable way to add nodes as per http://www.clusterresources.com/wiki/doku.php?id=torque:3.1_adding_nodes --- .../livecd/2008.0/overlay/usr/sbin/cluster-setup | 4 +++- catalyst/nfsroot/2008.0/scripts/nfsroot_setup.sh | 5 ++-- overlay/sys-cluster/beowulf-head/Manifest | 6 ++--- .../beowulf-head/beowulf-head-0.1.ebuild | 7 +++--- .../sys-cluster/beowulf-head/files/node-manager | 27 +++++++++++----------- overlay/sys-cluster/beowulf-head/files/torque-add | 5 ++++ 6 files changed, 32 insertions(+), 22 deletions(-) diff --git a/catalyst/livecd/2008.0/overlay/usr/sbin/cluster-setup b/catalyst/livecd/2008.0/overlay/usr/sbin/cluster-setup index 73c1d66..fe163ee 100755 --- a/catalyst/livecd/2008.0/overlay/usr/sbin/cluster-setup +++ b/catalyst/livecd/2008.0/overlay/usr/sbin/cluster-setup @@ -163,8 +163,9 @@ if [[ $? == 0 ]]; then USE=server emerge --config =sys-cluster/torque-2.3.3 # if mom is not started on the master node, listing it in nodes doesn't hurt it and # makes sure the file exists - echo $(hostname) >> $PBS_SERVER_HOME/server_priv/nodes + #echo $(hostname) >> $PBS_SERVER_HOME/server_priv/nodes /etc/init.d/pbs_server start + /etc/init.d/pbs_sched start USE=-server ROOT=$NFSROOT emerge --config =sys-cluster/torque-2.3.3 fi @@ -183,3 +184,4 @@ if [[ $1 == "--test" ]]; then echo '- eth0 is the public nic' echo '- all defaults in .config files get used' fi + diff --git a/catalyst/nfsroot/2008.0/scripts/nfsroot_setup.sh b/catalyst/nfsroot/2008.0/scripts/nfsroot_setup.sh index f2b7bed..dd35d58 100644 --- a/catalyst/nfsroot/2008.0/scripts/nfsroot_setup.sh +++ b/catalyst/nfsroot/2008.0/scripts/nfsroot_setup.sh @@ -46,16 +46,17 @@ openrc_diskless_setup() { # /etc/conf.d/bootmisc change_opt 'wipe_tmp="NO"' /etc/conf.d/bootmisc + # Using EOF is nice but the vars get interpreted # /etc/conf.d/net cat > /etc/conf.d/net <<-EOF preup() { local c=0 i for i in /sys/devices/system/cpu/cpu[0-9]*; do - c=$((${c} + 1)) + c=\$((\${c} + 1)) done - dhcpcd_eth0="--persistent --userclass=\"${c}\"" + dhcpcd_eth0="--persistent --userclass=\"\${c}\"" } EOF # The above is an alternative which doesn't use wc, thus no superficial process spawning. diff --git a/overlay/sys-cluster/beowulf-head/Manifest b/overlay/sys-cluster/beowulf-head/Manifest index 5fadd62..b995adb 100644 --- a/overlay/sys-cluster/beowulf-head/Manifest +++ b/overlay/sys-cluster/beowulf-head/Manifest @@ -2,8 +2,8 @@ AUX c3-add 686 RMD160 ede4fc8bb832046607e79f831942a3d0708cbcab SHA1 cb496f813706 AUX c3-del 104 RMD160 18b49c1a929b0fc08912692f41ec8df1313c940d SHA1 b7d66c89d64fcb7a32a8d62cb147dcaf2994c3e7 SHA256 f4e3e9431c40b50f1596816eaa4d1f760ab840ae555d5fa2a76a7672d0432dfa AUX cluster.conf 2621 RMD160 b26b26c4482be2d921fa8b1ddb99f42ea3120bf9 SHA1 3d1a2926c977472e2ace5a3b831681e4def390ad SHA256 56fe9f2e51f6a27467721b850fdf1e68ad3d3df484bfa096e8841cae5a044d54 AUX dnsmasq-2.4-conf 1552 RMD160 08dae8ff1cd345acedb4cbbb819f8ccf35211b8b SHA1 e770ba62ca66b61cd84409bbb22433b1dfcf9ff3 SHA256 f2b63d6fc8558c84e79b5f172c4057354bdfe000aae19f8aa1857655259a6b7f -AUX node-manager 2022 RMD160 c5585bd2b7f3ba8aa83ab30f783376ef08a35f93 SHA1 88f3865464020af9e9059faa82cb211a19ce7aa9 SHA256 fb37ebda323953ac2b63e1218cee04bca985c221f0868e166df435f1f6acd945 -AUX torque-add 479 RMD160 35a63ae8069a2f8966356d9379723684c7ff9299 SHA1 4de2f9c60a569e822377e737d3b5c1ccf53cc2ed SHA256 695c44a2cb81d1b81b35138fbe8ba0ea3aedf9e3e68d96f0e7f87f745f272794 +AUX node-manager 2101 RMD160 77c7f5281a0139b044f6efe934fbeb0d0682a11c SHA1 a6e03aa59573e790004bcbbf42e9bd5239722c56 SHA256 8e0dcce75e9eeef1029b81c2a935d68c472485b7ff41ac6e90c802195d8dd300 +AUX torque-add 629 RMD160 79040160b992cfefde964fc6511022e029c1b58a SHA1 324d4b0cb5071aaf3a6c62d01bb613cdf8ecf901 SHA256 5d3d4c3ab35923632b81eaf48d64d6cba1060d0a1887fbc85e8adf10b7fc52c9 AUX torque-del 132 RMD160 bc1e0a9ccc6ce20630003f7372d62bdb705b9187 SHA1 c735f03fb6715ab601cb84f4fbc92be78ce43575 SHA256 c455764846f192930625721958532d3cee64aa75d9d83a37b67544e4c7bdee64 -EBUILD beowulf-head-0.1.ebuild 7412 RMD160 6e2dfebb5dbf981ee6a41337c5e9a89bd686e567 SHA1 78b2c53f4e73b48a449d31a1e304289c8ac540b9 SHA256 1960ca36f06269557a8d39b8373bda595a7b2d91eab0a08a02aea46552b6736b +EBUILD beowulf-head-0.1.ebuild 7466 RMD160 ccce8655a1ac50b26fd7b6399fe8c869f72c7b41 SHA1 56f905994cad453a7c5c73657d306e7057379b84 SHA256 14d3301f82159232ef471ce3de20cab5af51dec363ce830a9f632d9f210d75ab MISC ChangeLog 101 RMD160 d735719ccc09237e68e4e984eb5451000b06c382 SHA1 79098d1e8ae451f0430d363da6b717966ade6e78 SHA256 8dd010e2772026c4a6b86839842931b99c85c44a5e9f380864cc275b55587ac2 diff --git a/overlay/sys-cluster/beowulf-head/beowulf-head-0.1.ebuild b/overlay/sys-cluster/beowulf-head/beowulf-head-0.1.ebuild index a27c7b7..e09e0c6 100644 --- a/overlay/sys-cluster/beowulf-head/beowulf-head-0.1.ebuild +++ b/overlay/sys-cluster/beowulf-head/beowulf-head-0.1.ebuild @@ -161,7 +161,7 @@ nfs_root_conf() # Yes, this could be looped on RO and RW...but that wouldn't be more redable for I in $ROEXPORTS do - if use nonfs4 ; then + if use nonfsv4 ; then echo "${I} $NETADDR/$SUBNET(ro,$NFSEPORTOPTS)" >> ${ROOT}/etc/exports else mkdir -p ${NFSROOT}/${I} @@ -178,7 +178,7 @@ nfs_root_conf() mkdir -p ${NFSROOT}/${I} mount -o bind ${I} ${NFSROOT}/${I} fi - use autofs && echo "${I/\/} -ro,$NFSMOUNTOPTS $CLUSTER_ETH_Ir:${I}" >> ${NFSROOT}/etc/autofs/auto.nfs + use autofs && echo "${I/\/} -rw,$NFSMOUNTOPTS $CLUSTER_ETH_Ir:${I}" >> ${NFSROOT}/etc/autofs/auto.nfs done } @@ -251,7 +251,8 @@ pkg_config() pxe_conf nfs_root_conf - for I in dnsmasq netmount nfs + use pbs && TORQUED="pbs_server pbs_sched" + for I in dnsmasq netmount nfs $TORQUED do rc-update add $I default /etc/init.d/$I start diff --git a/overlay/sys-cluster/beowulf-head/files/node-manager b/overlay/sys-cluster/beowulf-head/files/node-manager index e843f24..e77b764 100755 --- a/overlay/sys-cluster/beowulf-head/files/node-manager +++ b/overlay/sys-cluster/beowulf-head/files/node-manager @@ -22,9 +22,9 @@ beeps(){ run_modules(){ for I in ${MODULESPATH}/*-${COMMAND} do - $I $@ + $I $@ $DNSMASQ_USER_CLASS0 done - [ $BEEPS ] && beeps + [[ $BEEPS == "true" ]] && beeps } # Yes, it's crude but we'll elaborate later on something more functionnal @@ -60,21 +60,22 @@ HNAME=$4 ##### Variables END ### -if [[ $COMMAND == "old" ]] -then +if [[ $COMMAND == "del" ]]; then + run_modules $@ +else # we background this since it performs a ping with a 1 second timeout # on a 256 node system, those seconds add up... - if [[ $# -lt 4 ]]; then - # We ignore calls made with old and only 4 args as we're missing the - # processor count. This can be caused by dnsmasq reloading - # or dnsmasq not setting DNSMASQ_USER_CLASS0 as it seems to happen often - # on subsequent dhcp requests made by the client. + if [[ $# -lt 5 ]]; then + # We ignore calls made with only 4 args as we're missing the processor count. + # This can be caused by dnsmasq reloading or dnsmasq not setting DNSMASQ_USER_CLASS0 + # as the incoming request is either only a "renewal/rebind" request which doesnt trigger + # DHCPDISCOVER exit 0 - else - old_node $@ $DNSMASQ_USER_CLASS0 & + elif [[ $COMMAND == "old" ]]; then + old_node $@ & + else # COMMAND == add + run_modules $@ fi -else - run_modules $@ $DNSMASQ_USER_CLASS0 fi exit 0 diff --git a/overlay/sys-cluster/beowulf-head/files/torque-add b/overlay/sys-cluster/beowulf-head/files/torque-add index 0e7e6ef..89917a1 100755 --- a/overlay/sys-cluster/beowulf-head/files/torque-add +++ b/overlay/sys-cluster/beowulf-head/files/torque-add @@ -7,6 +7,8 @@ CLUSTER_CONF=%%CONFPATH/%%CONFIG_FILE touch $NODES # $4 == hostname $5 == number of procs +# we use the one from CLUSTER_CONF if +# $5 is empty grep -v $4 $NODES > $NODES.$$ if [[ -z "$5" && -z $NODENP ]]; then @@ -16,7 +18,10 @@ elif [[ ! -z $5 ]]; then else echo $4 np=$NODENP >> $NODES.$$ fi + cat $NODES.$$ | sort -g | uniq | egrep -v "(^$|^\W$)" > $NODES. mv $NODES. $NODES rm -f $NODES.$$ +# there is no other way to tell Torque the nodes file changed... +/etc/init.d/pbs_server restart -- cgit v1.2.3-65-gdbad