<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre, #msg p { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[23626] trunk/launchd/src/launchd_core_logic.c</title>
</head>
<body>
<div id="msg">
<dl>
<dt>Revision</dt> <dd><a href="http://trac.macosforge.org/projects/launchd/changeset/23626">23626</a></dd>
<dt>Author</dt> <dd>zarzycki@apple.com</dd>
<dt>Date</dt> <dd>2008-05-09 15:38:49 -0700 (Fri, 09 May 2008)</dd>
</dl>
<h3>Log Message</h3>
<pre><rdar://problem/5834727> 10A37: launchd SIGKILL'ing after 2s</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#trunklaunchdsrclaunchd_core_logicc">trunk/launchd/src/launchd_core_logic.c</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunklaunchdsrclaunchd_core_logicc"></a>
<div class="modfile"><h4>Modified: trunk/launchd/src/launchd_core_logic.c (23625 => 23626)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/launchd/src/launchd_core_logic.c        2008-05-09 20:11:15 UTC (rev 23625)
+++ trunk/launchd/src/launchd_core_logic.c        2008-05-09 22:38:49 UTC (rev 23626)
</span><span class="lines">@@ -99,8 +99,17 @@
</span><span class="cx"> #include "job_reply.h"
</span><span class="cx"> #include "job_forward.h"
</span><span class="cx">
</span><ins>+/*
+ * LAUNCHD_SAMPLE_TIMEOUT
+ * If the job hasn't exited in the given number of seconds after sending
+ * it a SIGTERM, start sampling it.
+ * LAUNCHD_DEFAULT_EXIT_TIMEOUT
+ * If the job hasn't exited in the given number of seconds after sending
+ * it a SIGTERM, SIGKILL it. Can be overriden in the job plist.
+ */
</ins><span class="cx"> #define LAUNCHD_MIN_JOB_RUN_TIME 10
</span><del>-#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 2
</del><ins>+#define LAUNCHD_SAMPLE_TIMEOUT 2
+#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 20
</ins><span class="cx"> #define LAUNCHD_SIGKILL_TIMER 5
</span><span class="cx">
</span><span class="cx"> #define SHUTDOWN_LOG_DIR "/var/log/shutdown"
</span><span class="lines">@@ -413,12 +422,13 @@
</span><span class="cx">          internal_exc_handler:1,                /* MachExceptionHandler == true */
</span><span class="cx">          stall_before_exec:1,                /* a hack to support an option of spawn_via_launchd() */
</span><span class="cx">          only_once:1,                        /* man launchd.plist --> LaunchOnlyOnce. Note: 5465184 Rename this to "HopefullyNeverExits" */
</span><del>-         currently_ignored:1,                /* Make job_ignore() / job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
</del><ins>+         currently_ignored:1,                /* Make job_ignore() / job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
</ins><span class="cx">          forced_peers_to_demand_mode:1,        /* A job that forced all other jobs to be temporarily launch-on-demand */
</span><span class="cx">          setnice:1,                                /* man launchd.plist --> Nice */
</span><span class="cx">          hopefully_exits_last:1,                /* man launchd.plist --> HopefullyExitsLast */
</span><span class="cx">          removal_pending:1,                        /* a job was asked to be unloaded/removed while running, we'll remove it after it exits */
</span><span class="cx">          sent_sigkill:1,                        /* job_kill() was called */
</span><ins>+         sampled:1,                                /* job_force_sampletool() was called (or is disabled) */
</ins><span class="cx">          debug_before_kill:1,                /* enter the kernel debugger before killing a job */
</span><span class="cx">          weird_bootstrap:1,                        /* a hack that launchd+launchctl use during jobmgr_t creation */
</span><span class="cx">          start_on_mount:1,                        /* man launchd.plist --> StartOnMount */
</span><span class="lines">@@ -627,12 +637,27 @@
</span><span class="cx">         if (newval < 0) {
</span><span class="cx">                 job_kill(j);
</span><span class="cx">         } else {
</span><ins>+                /*
+                 * If sampling is enabled and SAMPLE_TIMEOUT is earlier than the job exit_timeout,
+                 * then set a timer for SAMPLE_TIMEOUT seconds after killing
+                 */
+                unsigned int exit_timeout = j->exit_timeout;
+                bool do_sample = do_apple_internal_logging;
+                unsigned int timeout = exit_timeout;
+
+                if (do_sample && (!exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < exit_timeout))) {
+                        timeout = LAUNCHD_SAMPLE_TIMEOUT;
+                }
+
</ins><span class="cx">                 job_assumes(j, runtime_kill(j->p, SIGTERM) != -1);
</span><span class="cx">
</span><del>-                if (j->exit_timeout) {
</del><ins>+                if (timeout) {
+                        j->sampled = !do_sample;
</ins><span class="cx">                         job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
</span><del>-                                                EV_ADD|EV_ONESHOT, NOTE_SECONDS, j->exit_timeout, j) != -1);
-                } else {
</del><ins>+                                                EV_ADD|EV_ONESHOT, NOTE_SECONDS, timeout, j) != -1);
+                }
+
+                if (!exit_timeout) {
</ins><span class="cx">                         job_log(j, LOG_DEBUG, "This job has an infinite exit timeout");
</span><span class="cx">                 }
</span><span class="cx">
</span><span class="lines">@@ -2424,6 +2449,7 @@
</span><span class="cx">         }
</span><span class="cx">         j->last_exit_status = status;
</span><span class="cx">         j->sent_sigkill = false;
</span><ins>+        j->sampled = false;
</ins><span class="cx">         j->sent_kill_via_shmem = false;
</span><span class="cx">         j->lastlookup = NULL;
</span><span class="cx">         j->lastlookup_gennum = 0;
</span><span class="lines">@@ -2671,20 +2697,39 @@
</span><span class="cx">                 j->start_pending = true;
</span><span class="cx">                 job_dispatch(j, false);
</span><span class="cx">         } else if (&j->exit_timeout == ident) {
</span><ins>+                /*
+                 * This block might be executed up to 3 times for a given (slow) job
+                 * - once for the SAMPLE_TIMEOUT timer, at which point sampling is triggered
+                 * - once for the exit_timeout timer, at which point:
+                 * - sampling is performed if not triggered previously
+                 * - SIGKILL is being sent to the job
+                 * - once for the SIGKILL_TIMER timer, at which point we log an issue
+                 * with the long SIGKILL
+                 */
</ins><span class="cx">                 if (j->sent_sigkill) {
</span><span class="cx">                         uint64_t td = runtime_get_nanoseconds_since(j->sent_sigterm_time);
</span><span class="cx">
</span><span class="cx">                         td /= NSEC_PER_SEC;
</span><span class="cx">                         td -= j->exit_timeout;
</span><span class="cx">
</span><del>-                        job_log(j, LOG_ERR, "Did not die after sending SIGKILL %llu seconds ago...", td);
</del><ins>+                        job_log(j, LOG_WARNING, "Did not die after sending SIGKILL %llu seconds ago...", td);
+                } else if (!j->sampled && (!j->exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < j->exit_timeout))) {
+                        /* This should work even if the job changes its exit_timeout midstream */
+                        job_log(j, LOG_NOTICE, "Sampling timeout elapsed (%u seconds). Sampling...", LAUNCHD_SAMPLE_TIMEOUT);
+                        if (j->exit_timeout) {
+                                unsigned int ttk = (j->exit_timeout - LAUNCHD_SAMPLE_TIMEOUT);
+                                job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
+                                                        EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1);
+                                job_log(j, LOG_NOTICE, "Scheduled new exit timeout for %u seconds later", ttk);
+                        }
+                        job_force_sampletool(j);
</ins><span class="cx">                 } else {
</span><del>-                        job_force_sampletool(j);
</del><ins>+                        job_force_sampletool(j); /* no-op if already done in previous pass */
</ins><span class="cx">                         if (unlikely(j->debug_before_kill)) {
</span><del>-                                job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger.");
</del><ins>+                                job_log(j, LOG_NOTICE, "Exit timeout elapsed. Entering the kernel debugger");
</ins><span class="cx">                                 job_assumes(j, host_reboot(mach_host_self(), HOST_REBOOT_DEBUGGER) == KERN_SUCCESS);
</span><span class="cx">                         }
</span><del>-                        job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing.", j->exit_timeout);
</del><ins>+                        job_log(j, LOG_WARNING, "Exit timeout elapsed (%u seconds). Killing", j->exit_timeout);
</ins><span class="cx">                         job_kill(j);
</span><span class="cx">                 }
</span><span class="cx">         } else {
</span><span class="lines">@@ -3394,7 +3439,7 @@
</span><span class="cx">         if (j->stdin_fd) {
</span><span class="cx">                 job_assumes(j, dup2(j->stdin_fd, STDIN_FILENO) != -1);
</span><span class="cx">         } else {
</span><del>-                job_setup_fd(j, STDIN_FILENO, j->stdinpath, O_RDONLY|O_CREAT);
</del><ins>+                job_setup_fd(j, STDIN_FILENO, j->stdinpath, O_RDONLY|O_CREAT);
</ins><span class="cx">         }
</span><span class="cx">         job_setup_fd(j, STDOUT_FILENO, j->stdoutpath, O_WRONLY|O_CREAT|O_APPEND);
</span><span class="cx">         job_setup_fd(j, STDERR_FILENO, j->stderrpath, O_WRONLY|O_CREAT|O_APPEND);
</span><span class="lines">@@ -5206,10 +5251,15 @@
</span><span class="cx">         int wstatus;
</span><span class="cx">         pid_t sp;
</span><span class="cx">
</span><del>-        if (!do_apple_internal_logging) {
</del><ins>+        if (j->sampled) {
</ins><span class="cx">                 return;
</span><span class="cx">         }
</span><del>-        
</del><ins>+        j->sampled = true;
+
+        if (!job_assumes(j, do_apple_internal_logging)) {
+                return;
+        }
+
</ins><span class="cx">         if (!job_assumes(j, mkdir(SHUTDOWN_LOG_DIR, S_IRWXU) != -1 || errno == EEXIST)) {
</span><span class="cx">                 return;
</span><span class="cx">         }
</span></span></pre>
</div>
</div>
</body>
</html>