Revision: 23867 http://trac.macosforge.org/projects/launchd/changeset/23867 Author: dsorresso@apple.com Date: 2009-03-24 15:27:19 -0700 (Tue, 24 Mar 2009) Log Message: ----------- Added code for debugging system bootstrapper crashes. Modified Paths: -------------- trunk/launchd/src/launchctl.c trunk/launchd/src/launchd_core_logic.c Modified: trunk/launchd/src/launchctl.c =================================================================== --- trunk/launchd/src/launchctl.c 2009-03-24 20:59:07 UTC (rev 23866) +++ trunk/launchd/src/launchctl.c 2009-03-24 22:27:19 UTC (rev 23867) @@ -170,6 +170,8 @@ static void do_bootroot_magic(void); static void do_single_user_mode(bool); static bool do_single_user_mode2(void); +static void do_crash_debug_mode(void); +static bool do_crash_debug_mode2(void); static void read_launchd_conf(void); static void read_environment_dot_plist(void); static bool job_disabled_logic(launch_data_t obj); @@ -177,6 +179,8 @@ static void do_file_init(void) __attribute__((constructor)); static void setup_system_context(void); static void tell_launchd_about_boot_args(void); +static void handle_system_bootstrapper_crashes_separately(void); +static void fatal_signal_handler(int sig, siginfo_t *si, void *uap); typedef enum { BOOTCACHE_START = 1, @@ -262,6 +266,7 @@ static bool bootstrapping_system; static bool bootstrapping_peruser; static bool g_shutdown_debugging = false; +static bool g_booting_verbose = false; static bool g_job_overrides_db_has_changed = false; static CFMutableDictionaryRef g_job_overrides_db = NULL; @@ -1789,6 +1794,10 @@ if (!assumes(login_tty(fd) != -1)) { _exit(EXIT_FAILURE); } + + mach_timespec_t wt = { 5, 0 }; + IOKitWaitQuiet(kIOMasterPortDefault, &wt); /* This will hopefully return after all the kexts have shut up. */ + setenv("TERM", "vt100", 1); if (runcom_fsck) { fprintf(stdout, "Singleuser boot -- fsck not done\n"); @@ -1805,6 +1814,64 @@ _exit(EXIT_FAILURE); } +void +do_crash_debug_mode(void) +{ + while (!do_crash_debug_mode2()) { + sleep(1); + } +} + +bool +do_crash_debug_mode2(void) +{ + int wstatus; + int fd; + pid_t p; + + switch ((p = fork())) { + case -1: + syslog(LOG_ERR, "can't fork crash debug shell, trying again: %m"); + return false; + case 0: + break; + default: + assumes(waitpid(p, &wstatus, 0) != -1); + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) { + return true; + } else { + fprintf(stdout, "crash debug mode: exit status: %d\n", WEXITSTATUS(wstatus)); + } + } else { + fprintf(stdout, "crash debug mode shell: %s\n", strsignal(WTERMSIG(wstatus))); + } + return false; + } + + revoke(_PATH_CONSOLE); + if (!assumes((fd = open(_PATH_CONSOLE, O_RDWR)) != -1)) { + _exit(EXIT_FAILURE); + } + if (!assumes(login_tty(fd) != -1)) { + _exit(EXIT_FAILURE); + } + + mach_timespec_t wt = { 5, 0 }; + IOKitWaitQuiet(kIOMasterPortDefault, &wt); /* This will hopefully return after all the kexts have shut up. */ + + setenv("TERM", "vt100", 1); + fprintf(stdout, "Entering boot-time debugging mode...\n"); + fprintf(stdout, "The system bootstrapper process has crashed. To debug:\n"); + fprintf(stdout, "\tgdb attach %i\n", getppid()); + fprintf(stdout, "You can try booting the system with:\n"); + fprintf(stdout, "\tlaunchctl load -S System -D All\n\n"); + + execl(_PATH_BSHELL, "-sh", NULL); + syslog(LOG_ERR, "can't exec %s for crash debug: %m", _PATH_BSHELL); + _exit(EXIT_FAILURE); +} + static void exit_at_sigterm(int sig) { @@ -1813,6 +1880,33 @@ } } +void +fatal_signal_handler(int sig __attribute__((unused)), siginfo_t *si __attribute__((unused)), void *uap __attribute__((unused))) +{ + do_crash_debug_mode(); +} + +void +handle_system_bootstrapper_crashes_separately(void) +{ + if( !g_booting_verbose ) { + return; + } + + struct sigaction fsa; + + fsa.sa_sigaction = fatal_signal_handler; + fsa.sa_flags = SA_SIGINFO; + sigemptyset(&fsa.sa_mask); + + assumes(sigaction(SIGILL, &fsa, NULL) != -1); + assumes(sigaction(SIGFPE, &fsa, NULL) != -1); + assumes(sigaction(SIGBUS, &fsa, NULL) != -1); + assumes(sigaction(SIGSEGV, &fsa, NULL) != -1); + assumes(sigaction(SIGTRAP, &fsa, NULL) != -1); + assumes(sigaction(SIGABRT, &fsa, NULL) != -1); +} + static void system_specific_bootstrap(bool sflag) { @@ -1879,6 +1973,7 @@ tell_launchd_about_boot_args(); read_launchd_conf(); + handle_system_bootstrapper_crashes_separately(); if (path_check("/var/account/acct")) { assumes(acct("/var/account/acct") != -1); @@ -3012,7 +3107,6 @@ IOObjectRelease(entry); } while( 0 ); - Boolean is_verbose = false; if( value ) { /* Normally I'd just use CFStringFind(), but the compiler whines about it returning a * struct with -Wall. @@ -3020,10 +3114,10 @@ CFRange range = { 0, CFStringGetLength(value) }; CFRange found_range = { 0, 0 }; - is_verbose = CFStringFindWithOptions(value, CFSTR("-v"), range, 0, &found_range); + g_booting_verbose = CFStringFindWithOptions(value, CFSTR("-v"), range, 0, &found_range); CFRelease(value); - assumes(vproc_swap_integer(NULL, VPROC_GSK_SHUTDOWN_DEBUGGING, (int64_t *)&is_verbose, NULL) == KERN_SUCCESS); + assumes(vproc_swap_integer(NULL, VPROC_GSK_SHUTDOWN_DEBUGGING, (int64_t *)&g_booting_verbose, NULL) == KERN_SUCCESS); } } Modified: trunk/launchd/src/launchd_core_logic.c =================================================================== --- trunk/launchd/src/launchd_core_logic.c 2009-03-24 20:59:07 UTC (rev 23866) +++ trunk/launchd/src/launchd_core_logic.c 2009-03-24 22:27:19 UTC (rev 23867) @@ -2515,6 +2515,8 @@ struct rusage ru; int status; + bool is_system_bootstrapper = j->is_bootstrapper && pid1_magic && !j->mgr->parentmgr; + job_log(j, LOG_DEBUG, "Reaping"); if (j->shmem) { @@ -2561,7 +2563,7 @@ #endif } } - + /* * 5020256 * @@ -2655,6 +2657,10 @@ job_log(j, LOG_WARNING, "Exited abnormally: %s", strsignal(s)); break; } + + if( is_system_bootstrapper && j->crashed ) { + job_log(j, LOG_ERR | LOG_CONSOLE, "The %s bootstrapper has crashed: %s", j->mgr->name, strsignal(s)); + } } } @@ -2804,9 +2810,6 @@ switch( (p = fork()) ) { case 0 : job_assumes(j, runtime_close(execpair[0]) != -1); - /* Handle exceptions directly. */ - task_set_exception_ports(mach_task_self(), EXC_MASK_CRASH, runtime_get_kernel_port(), EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, f); - /* Wait for the parent to attach a kevent. */ read(_fd(execpair[1]), &p, sizeof(p)); what_to_do(j); @@ -6863,6 +6866,8 @@ goto out_bad; } + job_log(j, LOG_DEBUG | LOG_CONSOLE, "Location of job cache database: %s", launch_data_get_string(output_obj)); + launch_data_free(output_obj); break; case 0: