[launchd-changes] [23756] trunk/launchd/src

source_changes at macosforge.org source_changes at macosforge.org
Tue Nov 25 15:48:06 PST 2008


Revision: 23756
          http://trac.macosforge.org/projects/launchd/changeset/23756
Author:   dsorresso at apple.com
Date:     2008-11-25 15:48:06 -0800 (Tue, 25 Nov 2008)
Log Message:
-----------
<rdar://problem/6153922> Support allowing Launchd to launch by label (take 2)
<rdar://problem/6372699> Mach exception chain does not cleanly unwind
<rdar://problem/6398887> Race condition in sampling path, other changes to make sampling more robust

Modified Paths:
--------------
    trunk/launchd/src/launchd.c
    trunk/launchd/src/launchd_core_logic.c

Modified: trunk/launchd/src/launchd.c
===================================================================
--- trunk/launchd/src/launchd.c	2008-11-22 22:31:37 UTC (rev 23755)
+++ trunk/launchd/src/launchd.c	2008-11-25 23:48:06 UTC (rev 23756)
@@ -188,6 +188,11 @@
 
 	if( pid1_magic ) {
 		runtime_syslog(LOG_NOTICE | LOG_CONSOLE, "*** launchd[1] has started up. ***");
+		
+		struct stat sb;
+		if( stat("/var/db/.launchd_flat_per_user_namespace", &sb) == 0 ) {
+			runtime_syslog(LOG_NOTICE | LOG_CONSOLE, "Flat per-user Mach namespaces enabled.");
+		}
 	}
 
 	monitor_networking_state();

Modified: trunk/launchd/src/launchd_core_logic.c
===================================================================
--- trunk/launchd/src/launchd_core_logic.c	2008-11-22 22:31:37 UTC (rev 23755)
+++ trunk/launchd/src/launchd_core_logic.c	2008-11-25 23:48:06 UTC (rev 23756)
@@ -2699,12 +2699,9 @@
 		return;
 	}
 
-	job_log(j, LOG_DEBUG | LOG_CONSOLE, "Going to sample job.");
-
 	char pidstr[32];
 	snprintf(pidstr, sizeof(pidstr), "%u", j->p);
 	snprintf(j->mgr->sample_log_file, sizeof(j->mgr->sample_log_file), SHUTDOWN_LOG_DIR "/%s-%u.sample.txt", j->label, j->p);
-	job_log(j, LOG_DEBUG | LOG_CONSOLE, "Going to write sample to %s.", j->mgr->sample_log_file);
 	
 	if (job_assumes(j, unlink(jm->sample_log_file) != -1 || errno == ENOENT)) {
 		pid_t sp = 0;
@@ -2738,33 +2735,74 @@
 		
 		posix_spawnattr_destroy(&psattr);
 	#else
-		switch( (sp = vfork()) ) {
+		int execpair[2] = { 0, 0 };
+		job_assumes(j, socketpair(AF_UNIX, SOCK_STREAM, 0, execpair) != -1);
+		
+		switch( (sp = fork()) ) {
 			case 0	:
-				/* Handle sample's exceptions directly, since ReportCrash may not be able to. */
+				job_assumes(j, runtime_close(execpair[0]) != -1);
+				/* Handle sample's exceptions directly, since ReportCrash will not be able to. */
 				task_set_exception_ports(mach_task_self(), EXC_MASK_CRASH, runtime_get_kernel_port(), EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, f);
+				
+				/* Wait for the parent to attach a kevent. */
+				read(_fd(execpair[1]), &sp, sizeof(sp));
 				execve(sample_args[0], sample_args, environ);
 				job_log(j, LOG_NOTICE | LOG_CONSOLE, "Could not exec(2): %d", errno);
 				_exit(EXIT_FAILURE);
 			case -1	:
-				job_log(j, LOG_NOTICE | LOG_CONSOLE, "vfork(2) failed: %d", errno);
+				job_assumes(j, runtime_close(execpair[0]) != -1);
+				job_assumes(j, runtime_close(execpair[1]) != -1);
+				execpair[0] = -1;
+				execpair[1] = -1;
+				job_log(j, LOG_NOTICE | LOG_CONSOLE, "fork(2) failed: %d", errno);
 				break;
 			default	:
+				job_assumes(j, runtime_close(execpair[1]) != -1);
+				execpair[1] = -1;
 				break;
 		}
 		
+		int r = -1;
 		if( sp != -1 ) {
-			j->sample_pid = sp;
+			/* Let us know when sample is done. ONESHOT is implicit if we're just interested in NOTE_EXIT. */
+			if( job_assumes(j, (r = kevent_mod(sp, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, j)) != -1) ) {
+				if( job_assumes(j, write(execpair[0], &sp, sizeof(sp)) == sizeof(sp)) ) {
+					j->sample_pid = sp;
+				} else {
+					job_assumes(j, kevent_mod(sp, EVFILT_PROC, EV_DELETE, 0, 0, NULL) != -1);
+					job_assumes(j, runtime_kill(sp, SIGKILL) != -1);
+					r = -1;
+				}
+			} else {
+				job_assumes(j, runtime_kill(sp, SIGKILL) != -1);
+			}
 			
-			/* Let us know when sample is done. ONESHOT is implicit if we're just interested in NOTE_EXIT. */
-			job_assumes(j, kevent_mod(sp, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, j) != -1);
-		} else {
+			int status = 0;
+			if( r == -1 ) {
+				job_assumes(j, waitpid(sp, &status, WNOHANG) != -1);
+			}
+		}
+		
+		if( execpair[0] != -1 ) {
+			job_assumes(j, runtime_close(execpair[0]) != -1);
+		}
+		
+		if( execpair[1] != -1 ) {
+			job_assumes(j, runtime_close(execpair[0]) != -1);
+		}
+		
+		if( r == -1 ) {
 			job_log(j, LOG_ERR | LOG_CONSOLE, "Sampling for job failed!");
 			STAILQ_REMOVE(&jm->pending_samples, j, job_s, pending_samples_sle);
+			j->sampled = true;
 			jobmgr_dequeue_next_sample(jm);
+		} else {
+			job_log(j, LOG_DEBUG | LOG_CONSOLE, "Sampling job (sample PID: %i, file: %s).", sp, j->mgr->sample_log_file);
 		}
 	#endif
 	} else {
 		STAILQ_REMOVE(&jm->pending_samples, j, job_s, pending_samples_sle);
+		j->sampled = true;
 	}
 	
 	j->pending_sample = false;
@@ -2960,12 +2998,13 @@
 		job_kill(j);
 	}
 
+	job_log(j, LOG_DEBUG | LOG_CONSOLE, "sample[%i] finished with job.", j->sample_pid);
 	j->sample_pid = 0;
 	j->sampled = true;
 	STAILQ_REMOVE(&j->mgr->pending_samples, j, job_s, pending_samples_sle);
 	
 	if( j->reap_after_sample ) {
-		job_log(j, LOG_NOTICE | LOG_CONSOLE, "Sampling complete. Reaping.");
+		job_log(j, LOG_DEBUG | LOG_CONSOLE, "Reaping job now that sample is done.");
 		struct kevent kev;
 		EV_SET(&kev, 1, 0, 0, NOTE_EXIT, 0, 0);
 		
@@ -2973,8 +3012,6 @@
 		job_callback_proc(j, &kev);
 	}
 	
-	job_log(j, LOG_DEBUG | LOG_CONSOLE, "Finished sampling.");
-	
 	jobmgr_dequeue_next_sample(j->mgr);
 }
 
@@ -2986,14 +3023,13 @@
 	
 	if( j->sample_pid == (pid_t)kev->ident ) {
 		job_assumes(j, (fflags & NOTE_EXIT) != 0);
-		job_log(j, LOG_NOTICE | LOG_CONSOLE, "Sampling for job done. Reaping sample...");
 		
 		job_reap_sample(j);
 		
 		return;
 	} else if( j->sample_pid && !j->reap_after_sample ) {
 		/* The job exited before our sample completed. */
-		job_log(j, LOG_NOTICE | LOG_CONSOLE, "Job exited. Will reap after sample is complete.");
+		job_log(j, LOG_NOTICE | LOG_CONSOLE, "Job has exited. Will reap after sample[%i] is complete.", j->sample_pid);
 		j->reap_after_sample = true;
 		return;
 	}
@@ -3069,6 +3105,10 @@
 		 *  - once for the SIGKILL_TIMER timer, at which point we log an issue
 		 *    with the long SIGKILL
 		 */
+		
+		bool was_is_or_will_be_sampled = ( j->sampled || j->sample_pid || j->pending_sample );
+		bool should_enqueue = ( !was_is_or_will_be_sampled && do_apple_internal_logging );
+		
 		if (j->sent_sigkill) {
 			uint64_t td = runtime_get_nanoseconds_since(j->sent_signal_time);
 
@@ -3076,28 +3116,34 @@
 			td -= j->exit_timeout;
 
 			job_log(j, LOG_WARNING | LOG_CONSOLE, "Did not die after sending SIGKILL %llu seconds ago...", td);
-		} else if (!(j->sampled || j->sample_pid || j->pending_sample) && (!j->exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < j->exit_timeout))) {
-			if( do_apple_internal_logging ) {
-				/* This should work even if the job changes its exit_timeout midstream */
-				job_log(j, LOG_NOTICE | LOG_CONSOLE, "Sampling timeout elapsed (%u seconds). Scheduling a sample...", LAUNCHD_SAMPLE_TIMEOUT);
-				if (j->exit_timeout) {
-					unsigned int ttk = (j->exit_timeout - LAUNCHD_SAMPLE_TIMEOUT);
-					job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
-											  EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1);
-					job_log(j, LOG_NOTICE | LOG_CONSOLE, "Scheduled new exit timeout for %u seconds later", ttk);
-				}
-				
-				STAILQ_INSERT_TAIL(&j->mgr->pending_samples, j, pending_samples_sle);
-				j->pending_sample = true;
-				jobmgr_dequeue_next_sample(j->mgr);
+		} else if( should_enqueue && (!j->exit_timeout || (LAUNCHD_SAMPLE_TIMEOUT < j->exit_timeout)) ) {
+			/* This should work even if the job changes its exit_timeout midstream */
+			job_log(j, LOG_NOTICE | LOG_CONSOLE, "Sampling timeout elapsed (%u seconds). Scheduling a sample...", LAUNCHD_SAMPLE_TIMEOUT);
+			if (j->exit_timeout) {
+				unsigned int ttk = (j->exit_timeout - LAUNCHD_SAMPLE_TIMEOUT);
+				job_assumes(j, kevent_mod((uintptr_t)&j->exit_timeout, EVFILT_TIMER,
+										  EV_ADD|EV_ONESHOT, NOTE_SECONDS, ttk, j) != -1);
+				job_log(j, LOG_NOTICE | LOG_CONSOLE, "Scheduled new exit timeout for %u seconds later", ttk);
 			}
+			
+			STAILQ_INSERT_TAIL(&j->mgr->pending_samples, j, pending_samples_sle);
+			j->pending_sample = true;
+			jobmgr_dequeue_next_sample(j->mgr);
 		} else {
-			if( !(j->sampled || j->sample_pid || j->pending_sample) && do_apple_internal_logging ) {
-				job_log(j, LOG_WARNING | LOG_CONSOLE, "Exit timeout elapsed (%u seconds). Will kill after sampling.", j->exit_timeout);
-				STAILQ_INSERT_TAIL(&j->mgr->pending_samples, j, pending_samples_sle);
-				j->pending_sample = true;
-				j->kill_after_sample = true;
-				
+			if( do_apple_internal_logging && !j->sampled ) {
+				if( j->sample_pid || j->pending_sample ) {
+					char pidstr[24] = { 0 };
+					snprintf(pidstr, sizeof(pidstr), "[%i] ", j->sample_pid);
+					
+					job_log(j, LOG_DEBUG | LOG_CONSOLE, "Exit timeout elapsed (%u seconds). Will kill after sample%shas completed.", j->exit_timeout, j->sample_pid ? pidstr : " ");
+					j->kill_after_sample = true;
+				} else {
+					job_log(j, LOG_DEBUG | LOG_CONSOLE, "Exit timeout elapsed (%u seconds). Will sample and then kill.", j->exit_timeout);
+					
+					STAILQ_INSERT_TAIL(&j->mgr->pending_samples, j, pending_samples_sle);
+					j->pending_sample = true;
+				}			
+
 				jobmgr_dequeue_next_sample(j->mgr);
 			} else {
 				if (unlikely(j->debug_before_kill)) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.macosforge.org/pipermail/launchd-changes/attachments/20081125/7ab914b3/attachment.html>


More information about the launchd-changes mailing list