<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre, #msg p { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[23626] trunk/launchd/src/launchd_core_logic.c</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd><a href="http://trac.macosforge.org/projects/launchd/changeset/23626">23626</a></dd>
<dt>Author</dt> <dd>zarzycki@apple.com</dd>
<dt>Date</dt> <dd>2008-05-09 15:38:49 -0700 (Fri, 09 May 2008)</dd>
</dl>

<h3>Log Message</h3>
<pre>&lt;rdar://problem/5834727&gt; 10A37: launchd SIGKILL'ing after 2s</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunklaunchdsrclaunchd_core_logicc">trunk/launchd/src/launchd_core_logic.c</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunklaunchdsrclaunchd_core_logicc"></a>
<div class="modfile"><h4>Modified: trunk/launchd/src/launchd_core_logic.c (23625 => 23626)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/launchd/src/launchd_core_logic.c        2008-05-09 20:11:15 UTC (rev 23625)
+++ trunk/launchd/src/launchd_core_logic.c        2008-05-09 22:38:49 UTC (rev 23626)
</span><span class="lines">@@ -99,8 +99,17 @@
</span><span class="cx"> #include &quot;job_reply.h&quot;
</span><span class="cx"> #include &quot;job_forward.h&quot;
</span><span class="cx"> 
</span><ins>+/*
+ * LAUNCHD_SAMPLE_TIMEOUT
+ *   If the job hasn't exited in the given number of seconds after sending
+ *   it a SIGTERM, start sampling it.
+ * LAUNCHD_DEFAULT_EXIT_TIMEOUT
+ *   If the job hasn't exited in the given number of seconds after sending
+ *   it a SIGTERM, SIGKILL it. Can be overriden in the job plist.
+ */
</ins><span class="cx"> #define LAUNCHD_MIN_JOB_RUN_TIME 10
</span><del>-#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 2
</del><ins>+#define LAUNCHD_SAMPLE_TIMEOUT 2
+#define LAUNCHD_DEFAULT_EXIT_TIMEOUT 20
</ins><span class="cx"> #define LAUNCHD_SIGKILL_TIMER 5
</span><span class="cx"> 
</span><span class="cx"> #define SHUTDOWN_LOG_DIR &quot;/var/log/shutdown&quot;
</span><span class="lines">@@ -413,12 +422,13 @@
</span><span class="cx">              internal_exc_handler:1,                /* MachExceptionHandler == true */
</span><span class="cx">              stall_before_exec:1,                /* a hack to support an option of spawn_via_launchd() */
</span><span class="cx">              only_once:1,                        /* man launchd.plist --&gt; LaunchOnlyOnce. Note: 5465184 Rename this to &quot;HopefullyNeverExits&quot; */
</span><del>-             currently_ignored:1,                /* Make job_ignore() /  job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
</del><ins>+             currently_ignored:1,                /* Make job_ignore() / job_watch() work. If these calls were balanced, then this wouldn't be necessarily. */
</ins><span class="cx">              forced_peers_to_demand_mode:1,        /* A job that forced all other jobs to be temporarily launch-on-demand */
</span><span class="cx">              setnice:1,                                /* man launchd.plist --&gt; Nice */
</span><span class="cx">              hopefully_exits_last:1,                /* man launchd.plist --&gt; HopefullyExitsLast */
</span><span class="cx">              removal_pending:1,                        /* a job was asked to be unloaded/removed while running, we'll remove it after it exits */
</span><span class="cx">              sent_sigkill:1,                        /* job_kill() was called */
</span><ins>+             sampled:1,                                /* job_force_sampletool() was called (or is disabled) */
</ins><span class="cx">              debug_before_kill:1,                /* enter the kernel debugger before killing a job */
</span><span class="cx">              weird_bootstrap:1,                        /* a hack that launchd+launchctl use during jobmgr_t creation */
</span><span class="cx">              start_on_mount:1,                        /* man launchd.plist --&gt; StartOnMount */
</span><span class="lines">@@ -627,12 +637,27 @@
</span><span class="cx">         if (newval &lt; 0) {
</span><span class="cx">                 job_kill(j);
</span><span class="cx">         } else {
</span><ins>+                /*
+                 * If sampling is enabled and SAMPLE_TIMEOUT is earlier than the job exit_timeout,
+                 * then set a timer for SAMPLE_TIMEOUT seconds after killing
+                 */
+                unsigned int exit_timeout = j-&gt;exit_timeout;
+                bool do_sample = do_apple_internal_logging;
+                unsigned int timeout = exit_timeout;
+
+                if (do_sample &amp;&amp; (!exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT &lt; exit_timeout))) {
+                        timeout = LAUNCHD_SAMPLE_TIMEOUT;
+                }
+
</ins><span class="cx">                 job_assumes(j, runtime_kill(j-&gt;p, SIGTERM) != -1);
</span><span class="cx"> 
</span><del>-                if (j-&gt;exit_timeout) {
</del><ins>+                if (timeout) {
+                        j-&gt;sampled = !do_sample;
</ins><span class="cx">                         job_assumes(j, kevent_mod((uintptr_t)&amp;j-&gt;exit_timeout, EVFILT_TIMER,
</span><del>-                                                EV_ADD|EV_ONESHOT, NOTE_SECONDS, j-&gt;exit_timeout, j) != -1);
-                } else {
</del><ins>+                                                EV_ADD|EV_ONESHOT, NOTE_SECONDS, timeout, j) != -1);
+                }
+
+                if (!exit_timeout) {
</ins><span class="cx">                         job_log(j, LOG_DEBUG, &quot;This job has an infinite exit timeout&quot;);
</span><span class="cx">                 }
</span><span class="cx"> 
</span><span class="lines">@@ -2424,6 +2449,7 @@
</span><span class="cx">         }
</span><span class="cx">         j-&gt;last_exit_status = status;
</span><span class="cx">         j-&gt;sent_sigkill = false;
</span><ins>+        j-&gt;sampled = false;
</ins><span class="cx">         j-&gt;sent_kill_via_shmem = false;
</span><span class="cx">         j-&gt;lastlookup = NULL;
</span><span class="cx">         j-&gt;lastlookup_gennum = 0;
</span><span class="lines">@@ -2671,20 +2697,39 @@
</span><span class="cx">                 j-&gt;start_pending = true;
</span><span class="cx">                 job_dispatch(j, false);
</span><span class="cx">         } else if (&amp;j-&gt;exit_timeout == ident) {
</span><ins>+                /*
+                 * This block might be executed up to 3 times for a given (slow) job
+                 *  - once for the SAMPLE_TIMEOUT timer, at which point sampling is triggered
+                 *  - once for the exit_timeout timer, at which point:
+                 *          - sampling is performed if not triggered previously
+                 *          - SIGKILL is being sent to the job
+                 *  - once for the SIGKILL_TIMER timer, at which point we log an issue
+                 *    with the long SIGKILL
+                 */
</ins><span class="cx">                 if (j-&gt;sent_sigkill) {
</span><span class="cx">                         uint64_t td = runtime_get_nanoseconds_since(j-&gt;sent_sigterm_time);
</span><span class="cx"> 
</span><span class="cx">                         td /= NSEC_PER_SEC;
</span><span class="cx">                         td -= j-&gt;exit_timeout;
</span><span class="cx"> 
</span><del>-                        job_log(j, LOG_ERR, &quot;Did not die after sending SIGKILL %llu seconds ago...&quot;, td);
</del><ins>+                        job_log(j, LOG_WARNING, &quot;Did not die after sending SIGKILL %llu seconds ago...&quot;, td);
+                } else if (!j-&gt;sampled &amp;&amp; (!j-&gt;exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT &lt; j-&gt;exit_timeout))) {
+                        /* This should work even if the job changes its exit_timeout midstream */
+                        job_log(j, LOG_NOTICE, &quot;Sampling timeout elapsed (%u seconds). Sampling...&quot;, LAUNCHD_SAMPLE_TIMEOUT);
+                        if (j-&gt;exit_timeout) {
+                                unsigned int ttk = (j-&gt;exit_timeout - LAUNCHD_SAMPLE_TIMEOUT);
+                                job_assumes(j, kevent_mod((uintptr_t)&amp;j-&gt;exit_timeout, EVFILT_TIMER,
+                                                        EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1);
+                                job_log(j, LOG_NOTICE, &quot;Scheduled new exit timeout for %u seconds later&quot;, ttk);
+                        }
+                        job_force_sampletool(j);
</ins><span class="cx">                 } else {
</span><del>-                        job_force_sampletool(j);
</del><ins>+                        job_force_sampletool(j); /* no-op if already done in previous pass */
</ins><span class="cx">                         if (unlikely(j-&gt;debug_before_kill)) {
</span><del>-                                job_log(j, LOG_NOTICE, &quot;Exit timeout elapsed. Entering the kernel debugger.&quot;);
</del><ins>+                                job_log(j, LOG_NOTICE, &quot;Exit timeout elapsed. Entering the kernel debugger&quot;);
</ins><span class="cx">                                 job_assumes(j, host_reboot(mach_host_self(), HOST_REBOOT_DEBUGGER) == KERN_SUCCESS);
</span><span class="cx">                         }
</span><del>-                        job_log(j, LOG_WARNING, &quot;Exit timeout elapsed (%u seconds). Killing.&quot;, j-&gt;exit_timeout);
</del><ins>+                        job_log(j, LOG_WARNING, &quot;Exit timeout elapsed (%u seconds). Killing&quot;, j-&gt;exit_timeout);
</ins><span class="cx">                         job_kill(j);
</span><span class="cx">                 }
</span><span class="cx">         } else {
</span><span class="lines">@@ -3394,7 +3439,7 @@
</span><span class="cx">         if (j-&gt;stdin_fd) {
</span><span class="cx">                 job_assumes(j, dup2(j-&gt;stdin_fd, STDIN_FILENO) != -1);
</span><span class="cx">         } else {
</span><del>-                job_setup_fd(j, STDIN_FILENO,  j-&gt;stdinpath,  O_RDONLY|O_CREAT);
</del><ins>+                job_setup_fd(j, STDIN_FILENO, j-&gt;stdinpath, O_RDONLY|O_CREAT);
</ins><span class="cx">         }
</span><span class="cx">         job_setup_fd(j, STDOUT_FILENO, j-&gt;stdoutpath, O_WRONLY|O_CREAT|O_APPEND);
</span><span class="cx">         job_setup_fd(j, STDERR_FILENO, j-&gt;stderrpath, O_WRONLY|O_CREAT|O_APPEND);
</span><span class="lines">@@ -5206,10 +5251,15 @@
</span><span class="cx">         int wstatus;
</span><span class="cx">         pid_t sp;
</span><span class="cx"> 
</span><del>-        if (!do_apple_internal_logging) {
</del><ins>+        if (j-&gt;sampled) {
</ins><span class="cx">                 return;
</span><span class="cx">         }
</span><del>-        
</del><ins>+        j-&gt;sampled = true;
+
+        if (!job_assumes(j, do_apple_internal_logging)) {
+                return;
+        }
+
</ins><span class="cx">         if (!job_assumes(j, mkdir(SHUTDOWN_LOG_DIR, S_IRWXU) != -1 || errno == EEXIST)) {
</span><span class="cx">                 return;
</span><span class="cx">         }
</span></span></pre>
</div>
</div>

</body>
</html>