#!/usr/bin/python2.4 # Copyright 2008 Google Inc. All Rights Reserved. """\ Unpack a tarball and run 'make check' on a number of different machines. This is used to test an opensource project (encapsulated in a tar file) on a lot of machines of different kinds. The config file looks like this (roughly): ... specifies a unique os/cpu/etc combination -- each machine listed for the should be identical for whatever definition of 'identical' the opensource project wants to use. This script will take one machine on the line and untar the tarfile on that machine, and then run 'make && make check'. If it cannot run make check on one machine (because it's not pingable, not ssh-able, doesn't have gcc, etc), it will continue to the next one. For each machine it can run 'make check' on, it will run it, storing the output on the local disk in /tmp/.. """ import getpass import os import re import sys import time _LOG_LEVEL = 1 # How many seconds we wait until we decide an ssh has failed _SSH_TIMEOUT = 30 # The path we append on solaris machines (and maybe others) _SOLARIS_PATH = '/opt/SUNWspro/bin:/usr/sfw/bin/:/usr/ccs/bin' def _LOG(level, msg): if level <= _LOG_LEVEL: print >>sys.stderr, msg def _Logfile(tarfile, os_type): """"Returns the location of where to put the logfile, on local disk.""" # Guess where the tarfile expands to by getting rid of the .tar.gz suffix. project_dir = os.path.basename(tarfile) if project_dir.endswith('.tar.gz'): project_dir = project_dir[:-len('.tar.gz')] return os.path.join('/var/tmp', project_dir) + '.' + (os_type or '') def _EscapeString(s): """Puts s in single quotes, escaping all single-quotes within s.""" return "'" + s.replace("'", "'\"'\"'") + "'" def ReadConfigFile(config): """Reads and parses the given config file, returning the parsed file. Args: config: A File object holding the configuration. Returns: Two maps: a map of var->value (from VAR=value lines in the config file), and a map of OS -> machine_list. Each machine in the machine_list is of the form user@hostname. """ # TODO(csilvers): use ConfigParser module? varmap = {} # the VAR=value lines retval = {} # os -> machine_list map for line in config: line = re.sub(r'#.*$', '', line) # get rid of comments line = line.strip() m = re.match(r'^([\w_]+)=(.*)$', line) if m: varmap[m.group(1)] = m.group(2) continue words = line.split() if not words: # ignore blank lines continue machine_type = words[0] # create machine-list for this os for machine in words[1:]: if '@' in machine: # already has a username user_plus_machine = machine elif 'DEFAULT_USER' in varmap: user_plus_machine = varmap['DEFAULT_USER'] + '@' + machine else: print >>sys.stderr, ('Skipping %s: no user specified,' ' and no default user' % machine) continue retval.setdefault(machine_type, []).append(user_plus_machine) # TODO(csilvers): randomize the list of machines? return (varmap, retval) def DefaultPasswordFile(varmap): """Returns the filename holding a command that prints the password. Args: varmap: The varmap returned from ReadConfigFile (needed for: DEFAULT_PASSWORD_FILE). Returns: DEFAULT_PASSWORD_FILE (the filename holding a command that prints the password), if that is set and the file exists. If DEFAULT_PASSWORD_FILE is not set, return None. If it is set but we cannot read or create the necessary file, raise an exception. """ if 'DEFAULT_USER' not in varmap or 'DEFAULT_PASSWORD_FILE' not in varmap: return None password_file = varmap['DEFAULT_PASSWORD_FILE'] if os.access(password_file, os.R_OK): return password_file password_dir = os.path.dirname(password_file) if not os.path.exists(password_dir): os.makedirs(password_dir) os.chmod(password_dir, 0700) password = getpass.getpass('Please enter the password for user %s: ' % varmap['DEFAULT_USER']) f = open(password_file, 'w') f.write('echo -n %s\n' % password) f.close() os.chmod(password_file, 0700) return password_file def _StartSsh(user_plus_machine, cmd, var_map, timeout, ssh_in=None, ssh_out=None): """Sshes to the given machine and runs the given command, in a sub-process. Uses SSH_ASKPASS so you don't need to enter the password by hand. Args: user_plus_machine: , @, or :: where to run. The gateway machine means we need to do 'ssh gateway 'ssh machine 'cmd'' to run the command. cmd: The ssh command to run. var_map: The var_map as returned by ReadConfigFile (need password file). timeout: Timeout to use with the ssh command (must always be specified). ssh_in: If specified, a filename to use as stdin to the ssh command. ssh_out: If specified, a filename to use as stdout+stderr for the ssh cmd. Returns: (pid_of_suprocess, user_plus_machine). """ _LOG(2, 'Running "%s" on "%s"' % (cmd, user_plus_machine)) # SSH_ASKPASS only works if DISPLAY is set and there's no tty. # Getting rid of the tty is tricky. We use fork plus setsid(). pid = os.fork() if pid == 0: # CHILD PROCESS os.setsid() # this, among other things, gets rid of the tty if ssh_in: child_stdin = os.open(ssh_in, os.O_RDONLY) os.dup2(child_stdin, 0) if ssh_out: try: os.unlink(ssh_out) except (IOError, OSError): pass child_stdout = os.open(ssh_out, os.O_WRONLY | os.O_CREAT, 0644) os.dup2(child_stdout, 1) os.dup2(child_stdout, 2) # Chain the ssh's if machine is :. Each # ssh we add, we need to escape the command one more time. ssh_cmd = [] for (i, one_machine) in enumerate(user_plus_machine.split('%')): ssh_cmd.extend(['ssh', '-x', one_machine]) if i != 0: # don't need to escape the first usage cmd = _EscapeString(cmd) ssh_cmd.append(cmd) _LOG(3, 'Final ssh command: %s' % ssh_cmd) os.execv('/usr/bin/env', (['/usr/bin/env', 'SSH_ASKPASS=%s' % DefaultPasswordFile(var_map), 'DISPLAY=:0', '/usr/local/scripts/alarm', str(timeout)] + ssh_cmd)) else: # PARENT PROCESS # Just wait for child to finish and return the value. return (pid, user_plus_machine) def _MachineOnly(user_plus_machine): """Map , @, or :, to .""" # We make use of the fact that find returns -1 on failure. return user_plus_machine[user_plus_machine.find('@')+1:].split('%', 2)[0] def FindLiveMachines(machine_list, var_map): """Given a list of machines, return which machines are ssh-able. Args: machine_list: A list of machines, each of the form either '' or '@' var_map: The var_map as returned by ReadConfigFile (needed for password) Returns: Three lists: (alive, not-sshable, not-even-pingable). """ # TODO(csilvers): consider using three sets here, rather than three lists. # (right now it's lists to keep the same order as the input; # but how important is that?) if not machine_list: return ([], [], []) # Get just the names, not the user@. machine_names = "' '".join(frozenset([_MachineOnly(m) for m in machine_list])) # They've turned of ICMP on these machines, so we can't ping them anymore. # Just assume all machines are pingable. fping_set = frozenset([_MachineOnly(m) for m in machine_list]) # Now get back to user@machine by finding these in the original list. pingable_set = set([m for m in machine_list if _MachineOnly(m) in fping_set]) # Now figure out which pingable machines we can ssh to, in parallel. _LOG(1, 'Finding live machines by ssh-ing') sshing_list = [] for m in pingable_set: # We add the path for solaris always; it doesn't hurt for other machines. # We assume if the machine has make and crt1.o or crtn.o, it has a # full tool-chain. (Some machines have make/gcc but lack the C # libraries.) I've tested that linux, os x, solaris x86, and # freebsd all have one of /usr/lib/crt1.o and /usr/lib/crtn.o. # (slackware has it in the lib64 directory.) # cygwin uses crt0.o and mingw uses /mingw/lib, so add those too. # TODO(csilvers): just do an ls for crt?.o instead? cmd = ('PATH=${PATH:+$PATH:}' + _SOLARIS_PATH + ' && export PATH' ' && make /dev/null >/dev/null 2>&1' ' && test -f /usr/lib/crt1.o -o -f /usr/lib/crtn.o' ' -o -f /usr/lib/crt0.o -o -f /mingw/lib/crt1.o' ' -o -f /usr/lib64/crt0.o -o -f /usr/lib64/crt1.o' ' -o -f /usr/lib64/crtn.o') sshing_list.append(_StartSsh(m, "sh -c '%s'" % cmd, var_map, _SSH_TIMEOUT)) sshable_set = set() for (pid, machine) in sshing_list: (_, rc) = os.waitpid(pid, 0) if rc == 0: sshable_set.add(machine) else: _LOG(3, 'Cannot ssh to %s: rc %s' % (machine, rc)) # Make three unique lists: ssh-able, pingable, neither. _LOG(1, 'Done finding live machines') retval = ([], [], []) for m in machine_list: if m in sshable_set: retval[0].append(m) elif m in pingable_set: retval[1].append(m) else: retval[2].append(m) return retval def TestOnOneMachine(machine, tarfile, os_type, var_map, args_to_pass_to_configure): """Untars tarfile onto machine and runs 'make' + 'make check' there. Writes the logfile to the local disk, under /tmp/.. All work is done in an ssh subprocess. Args: machine: The machine (possibly user@machine or gateway_machine:machine) to run the test on. tarfile: The tarfile to unpack on the remote machine and run 'make' on. os_type: The OS+cpu+other info label used to describe this machine. var_map: The varmap as returned by ReadConfigFile. args_to_pass_to_configure: args to put on the configure commandline on the remote machine, for instance 'CXXFLAGS=-g' or 'CPP="gcc -E"'. Returns: pid of the ssh subprocess we started. """ # Guess where the tarfile expands to by getting rid of the .tar.gz suffix. project_dir = os.path.basename(tarfile) if project_dir.endswith('.tar.gz'): project_dir = project_dir[:-len('.tar.gz')] remote_tardir = os.path.join('/var/tmp', project_dir) logfile = _Logfile(tarfile, os_type) if 'olaris' in os_type: # we need some extra work on solaris configure = ('PATH=${PATH:+$PATH:}' + _SOLARIS_PATH + ' && export PATH' ' && ./configure') else: configure = './configure' escaped_args = [_EscapeString(a) for a in args_to_pass_to_configure] configure_args_string = ' '.join(escaped_args) cmd = ('cd `dirname %(tardir)s`; ' 'rm -rf %(tardir)s/{*,.[^.]*}; ' 'gzip -cd | tar xmf -; ' # not all tar's support the -z flag(!) 'uname -a; cat /etc/issue; gcc --version; ' # for documentation 'cd %(tardir)s && ' '%(configure)s %(configure_args)s && ' 'make check ' % {'tardir': remote_tardir, 'configure': configure, 'configure_args': configure_args_string}) (pid, _) = _StartSsh(machine, cmd, var_map, 3600, ssh_in=tarfile, ssh_out=logfile) return pid def _Status(tarfile, not_done, running, passed, failed): """Given four lists of machines, returns a string of the current status.""" retlines = [] # will store a list of pairs: os_type and associated message not_done = not_done[:] # make a copy not_done.sort() for os_type in not_done: retlines.append((os_type, '(not running: no live machine found)')) passed = passed[:] passed.sort() for os_type in passed: retlines.append((os_type, 'DONE (PASS): %s' % _Logfile(tarfile, os_type))) failed = failed[:] failed.sort() for os_type in failed: retlines.append((os_type, 'DONE (*FAIL*): %s' % _Logfile(tarfile, os_type))) running = running.values()[:] running.sort() for (os_type, machine) in running: retlines.append((os_type, 'running on %s' % machine)) # Figure out the length of the longest os-type, so we can line up columns. max_len = max([len(r[0]) for r in retlines]) retval = '' for (os_type, status) in retlines: # We'll indent a few spaces, to stand out, in addition to lining things up retval += ' %-*s %s\n' % (max_len + 1, os_type + ':', status) return retval def TestOnMachines(machine_map, var_map, tarfile, args_to_pass_to_configure): """Unpacks tarfile and runs 'make check' on every machine-type in the map. Given a list of machines, figure out which ones are alive and working, and test on one alive-and-working machine of each OS type. Args: machine_map: a map of os_type to machine-list, from ReadConfigFile. var_map: a map of variable->value as returned from ReadConfigFile. tarfile: the distribution file we unpack to run 'make' + 'make check'. args_to_pass_to_configure: args to put on the configure commandline on the remote machine, for instance 'CXXFLAGS=-g' or 'CPP="gcc -E"'. """ DefaultPasswordFile(var_map) # force creation of the password file machines = [] for one_machinelist in machine_map.values(): machines.extend(one_machinelist) (alive, pingable, dead) = FindLiveMachines(machines, var_map) _LOG(2, 'Alive machines: %s' % alive) _LOG(3, 'Pingable machines: %s' % pingable) _LOG(3, 'Dead machines: %s' % dead) if args_to_pass_to_configure: _LOG(0, ('Passing the following flags to ./configure: %s' % ' '.join(args_to_pass_to_configure))) running = {} not_done = [] passed = [] failed = [] os_types_and_machines_lists = machine_map.items() os_types_and_machines_lists.sort() # just so we log in a nice, pretty order for (os_type, machine_list) in os_types_and_machines_lists: for m in machine_list: if m in alive: _LOG(1, '%s: running on machine %s' % (os_type, m)) pid = TestOnOneMachine(m, tarfile, os_type, var_map, args_to_pass_to_configure) running[pid] = (os_type, m) break else: # means no machine in machine_list is alive _LOG(1, '%s: NOT RUNNING: no live machine found' % (os_type)) not_done.append(os_type) _LOG(0, 'Logs will end up in %s' % _Logfile(tarfile, '{' + ','.join([x[0] for x in running.values()]) + '}')) try: while running: _LOG(0, '[%s] STATUS:\n%s' % (time.ctime(), _Status(tarfile, not_done, running, passed, failed))) (pid, status) = os.wait() assert pid in running if status == 0: passed.append(running[pid][0]) # store the os-types _LOG(2, 'Finished: %s (pid %s): PASSED' % (running[pid], pid)) else: failed.append(running[pid][0]) _LOG(2, 'Finished: %s (pid %s): FAILED' % (running[pid], pid)) del running[pid] finally: # print even if the user decides to control-C while running _LOG(0, '----') _LOG(0, '[%s] DONE:\n%s' % (time.ctime(), _Status(tarfile, not_done, running, passed, failed))) if running: _LOG(0, 'Incomplete logs are in\n %s' % _Logfile(tarfile, '{' + ','.join([x[0] for x in running.values()]) + '}')) if passed or failed: done = passed + failed done.sort() # just so we print in a pretty order _LOG(0, 'Finished logs are in\n %s' % _Logfile(tarfile, '{' + ','.join(done) + '}')) def Main(): try: (var_map, machine_map) = ReadConfigFile(open(sys.argv[1])) # TODO(csilvers): accept args allowing to run on only a subset of os-types TestOnMachines(machine_map, var_map, sys.argv[2], sys.argv[3:]) except IndexError: sys.exit('USAGE: %s [args to pass to configure]\n' % sys.argv[0]) if __name__ == '__main__': Main()