[launchd-changes] [23626] trunk/launchd/src/launchd_core_logic.c

source_changes at macosforge.org source_changes at macosforge.org
Fri May 9 15:38:52 PDT 2008


Revision: 23626
          http://trac.macosforge.org/projects/launchd/changeset/23626
Author:   zarzycki at apple.com
Date:     2008-05-09 15:38:49 -0700 (Fri, 09 May 2008)

Log Message:
-----------
<rdar://problem/5834727> 10A37: launchd SIGKILL'ing after 2s

Modified Paths:
--------------
    trunk/launchd/src/launchd_core_logic.c

Modified: trunk/launchd/src/launchd_core_logic.c
===================================================================
--- trunk/launchd/src/launchd_core_logic.c	2008-05-09 20:11:15 UTC (rev 23625)
+++ trunk/launchd/src/launchd_core_logic.c	2008-05-09 22:38:49 UTC (rev 23626)
@@ -99,8 +99,17 @@
 #include "job_reply.h"
 #include "job_forward.h"
 
+/*
+ * LAUNCHD_SAMPLE_TIMEOUT
+ *   If the job hasn't exited in the given number of seconds after sending
+ *   it a SIGTERM, start sampling it.
+ * LAUNCHD_DEFAULT_EXIT_TIMEOUT
+ *   If the job hasn't exited in the given number of seconds after sending
+ *   it a SIGTERM, SIGKILL it. Can be overriden in the job plist.
+ */
 #define LAUNCHD_MIN_JOB_RUN_TIME 10
-#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 2
+#define LAUNCHD_SAMPLE_TIMEOUT 2
+#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 20
 #define LAUNCHD_SIGKILL_TIMER 5
 
 #define SHUTDOWN_LOG_DIR "/var/log/shutdown"
@@ -413,12 +422,13 @@
 	     internal_exc_handler:1,		/* MachExceptionHandler == true */
 	     stall_before_exec:1,		/* a hack to support an option of spawn_via_launchd() */
 	     only_once:1,			/* man launchd.plist --> LaunchOnlyOnce. Note: 5465184 Rename this to "HopefullyNeverExits" */
-	     currently_ignored:1,		/* Make job_ignore() /  job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
+	     currently_ignored:1,		/* Make job_ignore() / job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
 	     forced_peers_to_demand_mode:1,	/* A job that forced all other jobs to be temporarily launch-on-demand */
 	     setnice:1,				/* man launchd.plist --> Nice */
 	     hopefully_exits_last:1,		/* man launchd.plist --> HopefullyExitsLast */
 	     removal_pending:1,			/* a job was asked to be unloaded/removed while running, we'll remove it after it exits */
 	     sent_sigkill:1,			/* job_kill() was called */
+	     sampled:1,				/* job_force_sampletool() was called (or is disabled) */
 	     debug_before_kill:1,		/* enter the kernel debugger before killing a job */
 	     weird_bootstrap:1,			/* a hack that launchd+launchctl use during jobmgr_t creation */
 	     start_on_mount:1,			/* man launchd.plist --> StartOnMount */
@@ -627,12 +637,27 @@
 	if (newval < 0) {
 		job_kill(j);
 	} else {
+		/*
+		 * If sampling is enabled and SAMPLE_TIMEOUT is earlier than the job exit_timeout,
+		 * then set a timer for SAMPLE_TIMEOUT seconds after killing
+		 */
+		unsigned int exit_timeout = j->exit_timeout;
+		bool do_sample = do_apple_internal_logging;
+		unsigned int timeout = exit_timeout;
+
+		if (do_sample && (!exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < exit_timeout))) {
+			timeout = LAUNCHD_SAMPLE_TIMEOUT;
+		}
+
 		job_assumes(j, runtime_kill(j->p, SIGTERM) != -1);
 
-		if (j->exit_timeout) {
+		if (timeout) {
+			j->sampled = !do_sample;
 			job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
-						EV_ADD|EV_ONESHOT, NOTE_SECONDS, j->exit_timeout, j) != -1);
-		} else {
+						EV_ADD|EV_ONESHOT, NOTE_SECONDS, timeout, j) != -1);
+		}
+
+		if (!exit_timeout) {
 			job_log(j, LOG_DEBUG, "This job has an infinite exit timeout");
 		}
 
@@ -2424,6 +2449,7 @@
 	}
 	j->last_exit_status = status;
 	j->sent_sigkill = false;
+	j->sampled = false;
 	j->sent_kill_via_shmem = false;
 	j->lastlookup = NULL;
 	j->lastlookup_gennum = 0;
@@ -2671,20 +2697,39 @@
 		j->start_pending = true;
 		job_dispatch(j, false);
 	} else if (&j->exit_timeout == ident) {
+		/*
+		 * This block might be executed up to 3 times for a given (slow) job
+		 *  - once for the SAMPLE_TIMEOUT timer, at which point sampling is triggered
+		 *  - once for the exit_timeout timer, at which point:
+		 *          - sampling is performed if not triggered previously
+		 *          - SIGKILL is being sent to the job
+		 *  - once for the SIGKILL_TIMER timer, at which point we log an issue
+		 *    with the long SIGKILL
+		 */
 		if (j->sent_sigkill) {
 			uint64_t td = runtime_get_nanoseconds_since(j->sent_sigterm_time);
 
 			td /= NSEC_PER_SEC;
 			td -= j->exit_timeout;
 
-			job_log(j, LOG_ERR, "Did not die after sending SIGKILL %llu seconds ago...", td);
+			job_log(j, LOG_WARNING, "Did not die after sending SIGKILL %llu seconds ago...", td);
+		} else if (!j->sampled && (!j->exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < j->exit_timeout))) {
+			/* This should work even if the job changes its exit_timeout midstream */
+			job_log(j, LOG_NOTICE, "Sampling timeout elapsed (%u seconds). Sampling...", LAUNCHD_SAMPLE_TIMEOUT);
+			if (j->exit_timeout) {
+				unsigned int ttk = (j->exit_timeout - LAUNCHD_SAMPLE_TIMEOUT);
+				job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
+							EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1);
+				job_log(j, LOG_NOTICE, "Scheduled new exit timeout for %u seconds later", ttk);
+			}
+			job_force_sampletool(j);
 		} else {
-			job_force_sampletool(j);
+			job_force_sampletool(j); /* no-op if already done in previous pass */
 			if (unlikely(j->debug_before_kill)) {
-				job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger.");
+				job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger");
 				job_assumes(j, host_reboot(mach_host_self(), HOST_REBOOT_DEBUGGER) == KERN_SUCCESS);
 			}
-			job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing.", j->exit_timeout);
+			job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing", j->exit_timeout);
 			job_kill(j);
 		}
 	} else {
@@ -3394,7 +3439,7 @@
 	if (j->stdin_fd) {
 		job_assumes(j, dup2(j->stdin_fd, STDIN_FILENO) != -1);
 	} else {
-		job_setup_fd(j, STDIN_FILENO,  j->stdinpath,  O_RDONLY|O_CREAT);
+		job_setup_fd(j, STDIN_FILENO, j->stdinpath, O_RDONLY|O_CREAT);
 	}
 	job_setup_fd(j, STDOUT_FILENO, j->stdoutpath, O_WRONLY|O_CREAT|O_APPEND);
 	job_setup_fd(j, STDERR_FILENO, j->stderrpath, O_WRONLY|O_CREAT|O_APPEND);
@@ -5206,10 +5251,15 @@
 	int wstatus;
 	pid_t sp;
 
-	if (!do_apple_internal_logging) {
+	if (j->sampled) {
 		return;
 	}
-	
+	j->sampled = true;
+
+	if (!job_assumes(j, do_apple_internal_logging)) {
+		return;
+	}
+
 	if (!job_assumes(j, mkdir(SHUTDOWN_LOG_DIR, S_IRWXU) != -1 || errno == EEXIST)) {
 		return;
 	}

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://lists.macosforge.org/pipermail/launchd-changes/attachments/20080509/847214a6/attachment.html


More information about the launchd-changes mailing list