<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[14741] twext/trunk/twext/enterprise</title>
</head>
<body>

<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg ul { text-indent: -1em; padding-left: 1em; }#logmsg ol { text-indent: -1.5em; padding-left: 1.5em; }
#logmsg > ul, #logmsg > ol { margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://trac.calendarserver.org//changeset/14741">14741</a></dd>
<dt>Author</dt> <dd>cdaboo@apple.com</dd>
<dt>Date</dt> <dd>2015-05-04 11:26:40 -0700 (Mon, 04 May 2015)</dd>
</dl>

<h3>Log Message</h3>
<pre>Better error handling for failures during the main job loop.</pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#twexttrunktwextenterprisejobqueuepy">twext/trunk/twext/enterprise/jobqueue.py</a></li>
<li><a href="#twexttrunktwextenterprisetesttest_jobqueuepy">twext/trunk/twext/enterprise/test/test_jobqueue.py</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="twexttrunktwextenterprisejobqueuepy"></a>
<div class="modfile"><h4>Modified: twext/trunk/twext/enterprise/jobqueue.py (14740 => 14741)</h4>
<pre class="diff"><span>
<span class="info">--- twext/trunk/twext/enterprise/jobqueue.py        2015-05-04 16:56:50 UTC (rev 14740)
+++ twext/trunk/twext/enterprise/jobqueue.py        2015-05-04 18:26:40 UTC (rev 14741)
</span><span class="lines">@@ -2102,6 +2102,7 @@
</span><span class="cx">             nowTime = datetime.utcfromtimestamp(self.reactor.seconds())
</span><span class="cx"> 
</span><span class="cx">             txn = self.transactionFactory(label=&quot;jobqueue.workCheck&quot;)
</span><ins>+            nextJob = None
</ins><span class="cx">             try:
</span><span class="cx">                 nextJob = yield JobItem.nextjob(txn, nowTime, minPriority)
</span><span class="cx">                 if nextJob is None:
</span><span class="lines">@@ -2137,13 +2138,49 @@
</span><span class="cx">                 loopCounter += 1
</span><span class="cx"> 
</span><span class="cx">             except Exception as e:
</span><del>-                log.error(&quot;Failed to pick a new job, {exc}&quot;, exc=e)
</del><ins>+                log.error(
+                    &quot;Failed to pick a new job: {jobID}, {exc}&quot;,
+                    jobID=nextJob.jobID if nextJob else &quot;?&quot;,
+                    exc=e,
+                )
</ins><span class="cx">                 yield txn.abort()
</span><span class="cx">                 txn = None
</span><del>-                nextJob = None
</del><ins>+
+                # If we can identify the problem job, try and set it to failed so that it
+                # won't block other jobs behind it (it will be picked again when the failure
+                # interval is exceeded - but that has a back off so a permanently stuck item
+                # should fade away. We probably want to have some additional logic to simply
+                # remove something that is permanently failing.
+                if nextJob is not None:
+                    txn = self.transactionFactory(label=&quot;jobqueue.workCheck.failed&quot;)
+                    try:
+                        failedJob = yield JobItem.load(txn, nextJob.jobID)
+                        yield failedJob.failedToRun()
+                    except Exception as e:
+                        # Could not mark as failed - break out of the next job loop
+                        log.error(
+                            &quot;Failed to mark failed new job:{}, {exc}&quot;,
+                            jobID=nextJob.jobID,
+                            exc=e,
+                        )
+                        yield txn.abort()
+                        txn = None
+                        nextJob = None
+                        break
+                    else:
+                        # Marked the problem one as failed, so keep going and get the next job
+                        log.error(&quot;Marked failed new job: {jobID}&quot;, jobID=nextJob.jobID)
+                        yield txn.commit()
+                        txn = None
+                        nextJob = None
+                else:
+                    # Cannot mark anything as failed - break out of next job loop
+                    log.error(&quot;Cannot mark failed new job&quot;)
+                    break
</ins><span class="cx">             finally:
</span><span class="cx">                 if txn:
</span><span class="cx">                     yield txn.commit()
</span><ins>+                    txn = None
</ins><span class="cx"> 
</span><span class="cx">             if nextJob is not None:
</span><span class="cx">                 try:
</span></span></pre></div>
<a id="twexttrunktwextenterprisetesttest_jobqueuepy"></a>
<div class="modfile"><h4>Modified: twext/trunk/twext/enterprise/test/test_jobqueue.py (14740 => 14741)</h4>
<pre class="diff"><span>
<span class="info">--- twext/trunk/twext/enterprise/test/test_jobqueue.py        2015-05-04 16:56:50 UTC (rev 14740)
+++ twext/trunk/twext/enterprise/test/test_jobqueue.py        2015-05-04 18:26:40 UTC (rev 14741)
</span><span class="lines">@@ -1059,7 +1059,7 @@
</span><span class="cx">     @inlineCallbacks
</span><span class="cx">     def test_temporaryFailure(self):
</span><span class="cx">         &quot;&quot;&quot;
</span><del>-        When a work item temporARILY fails it should appear as unassigned in the JOB
</del><ins>+        When a work item temporarily fails it should appear as unassigned in the JOB
</ins><span class="cx">         table and have the failure count bumped, and a notBefore set to the temporary delay.
</span><span class="cx">         &quot;&quot;&quot;
</span><span class="cx">         dbpool, _ignore_qpool, clock, _ignore_performerChosen = self._setupPools()
</span><span class="lines">@@ -1090,7 +1090,160 @@
</span><span class="cx">         self.assertTrue(jobs[0].notBefore &gt; datetime.datetime.utcnow() + datetime.timedelta(seconds=90))
</span><span class="cx"> 
</span><span class="cx"> 
</span><ins>+    @inlineCallbacks
+    def test_loopFailure_noRecovery(self):
+        &quot;&quot;&quot;
+        When L{_workCheck} fails in its loop we need the problem job marked as failed.
+        &quot;&quot;&quot;
+        dbpool, _ignore_qpool, clock, _ignore_performerChosen = self._setupPools()
+        fakeNow = datetime.datetime(2012, 12, 12, 12, 12, 12)
</ins><span class="cx"> 
</span><ins>+        oldNextJob = JobItem.nextjob
+        @inlineCallbacks
+        def _nextJob(cls, txn, now, minPriority):
+            job = yield oldNextJob(txn, now, minPriority)
+            work = yield job.workItem()
+            if work.a == -2:
+                raise ValueError(&quot;oops&quot;)
+
+        self.patch(JobItem, &quot;nextjob&quot;, classmethod(_nextJob))
+
+        # Let's create a couple of work items directly, not via the enqueue
+        # method, so that they exist but nobody will try to immediately execute
+        # them.
+
+        @transactionally(dbpool.pool.connection)
+        @inlineCallbacks
+        def setup(txn):
+            # Failing
+            yield DummyWorkItem.makeJob(
+                txn, a=-2, b=1, notBefore=fakeNow - datetime.timedelta(20 * 60)
+            )
+            # OK
+            yield DummyWorkItem.makeJob(
+                txn, a=1, b=0, notBefore=fakeNow - datetime.timedelta(20 * 60, 5)
+            )
+        yield setup
+        clock.advance(20 - 12)
+
+        @transactionally(dbpool.pool.connection)
+        def check(txn):
+            return JobItem.all(txn)
+
+        jobs = yield check
+        self.assertEqual(len(jobs), 2)
+        self.assertEqual(jobs[0].assigned, None)
+        self.assertEqual(jobs[0].failed, 0)
+        self.assertEqual(jobs[0].notBefore, fakeNow - datetime.timedelta(20 * 60))
+        self.assertEqual(jobs[1].assigned, None)
+        self.assertEqual(jobs[1].failed, 0)
+        self.assertEqual(jobs[1].notBefore, fakeNow - datetime.timedelta(20 * 60, 5))
+
+
+    @inlineCallbacks
+    def test_loopFailure_recovery(self):
+        &quot;&quot;&quot;
+        When L{_workCheck} fails in its loop we need the problem job marked as failed.
+        &quot;&quot;&quot;
+        dbpool, _ignore_qpool, clock, _ignore_performerChosen = self._setupPools()
+        fakeNow = datetime.datetime(2012, 12, 12, 12, 12, 12)
+
+        oldAssign = JobItem.assign
+        @inlineCallbacks
+        def _assign(self, when, overdue):
+            work = yield self.workItem()
+            if work.a == -2:
+                raise ValueError(&quot;oops&quot;)
+            yield oldAssign(self, when, overdue)
+
+        self.patch(JobItem, &quot;assign&quot;, _assign)
+
+        # Let's create a couple of work items directly, not via the enqueue
+        # method, so that they exist but nobody will try to immediately execute
+        # them.
+
+        @transactionally(dbpool.pool.connection)
+        @inlineCallbacks
+        def setup(txn):
+            # Failing
+            yield DummyWorkItem.makeJob(
+                txn, a=-2, b=1, notBefore=fakeNow - datetime.timedelta(20 * 60)
+            )
+            # OK
+            yield DummyWorkItem.makeJob(
+                txn, a=1, b=0, notBefore=fakeNow - datetime.timedelta(20 * 60, 5)
+            )
+        yield setup
+        clock.advance(20 - 12)
+
+        @transactionally(dbpool.pool.connection)
+        def check(txn):
+            return JobItem.all(txn)
+
+        jobs = yield check
+        self.assertEqual(len(jobs), 1)
+        self.assertEqual(jobs[0].assigned, None)
+        self.assertEqual(jobs[0].failed, 1)
+        self.assertGreater(jobs[0].notBefore, datetime.datetime.utcnow() + datetime.timedelta(seconds=30))
+
+
+    @inlineCallbacks
+    def test_loopFailure_failedRecovery(self):
+        &quot;&quot;&quot;
+        When L{_workCheck} fails in its loop we need the problem job marked as failed.
+        &quot;&quot;&quot;
+        dbpool, _ignore_qpool, clock, _ignore_performerChosen = self._setupPools()
+        fakeNow = datetime.datetime(2012, 12, 12, 12, 12, 12)
+
+        oldAssign = JobItem.assign
+        @inlineCallbacks
+        def _assign(self, when, overdue):
+            work = yield self.workItem()
+            if work.a == -2:
+                raise ValueError(&quot;oops&quot;)
+            yield oldAssign(self, when, overdue)
+
+        self.patch(JobItem, &quot;assign&quot;, _assign)
+
+        @inlineCallbacks
+        def _failedToRun(self, locked=False, delay=None):
+            raise ValueError(&quot;oops&quot;)
+
+        self.patch(JobItem, &quot;failedToRun&quot;, _failedToRun)
+
+        # Let's create a couple of work items directly, not via the enqueue
+        # method, so that they exist but nobody will try to immediately execute
+        # them.
+
+        @transactionally(dbpool.pool.connection)
+        @inlineCallbacks
+        def setup(txn):
+            # Failing
+            yield DummyWorkItem.makeJob(
+                txn, a=-2, b=1, notBefore=fakeNow - datetime.timedelta(20 * 60)
+            )
+            # OK
+            yield DummyWorkItem.makeJob(
+                txn, a=1, b=0, notBefore=fakeNow - datetime.timedelta(20 * 60, 5)
+            )
+        yield setup
+        clock.advance(20 - 12)
+
+        @transactionally(dbpool.pool.connection)
+        def check(txn):
+            return JobItem.all(txn)
+
+        jobs = yield check
+        self.assertEqual(len(jobs), 2)
+        self.assertEqual(jobs[0].assigned, None)
+        self.assertEqual(jobs[0].failed, 0)
+        self.assertEqual(jobs[0].notBefore, fakeNow - datetime.timedelta(20 * 60))
+        self.assertEqual(jobs[1].assigned, None)
+        self.assertEqual(jobs[1].failed, 0)
+        self.assertEqual(jobs[1].notBefore, fakeNow - datetime.timedelta(20 * 60, 5))
+
+
+
</ins><span class="cx"> class HalfConnection(object):
</span><span class="cx">     def __init__(self, protocol):
</span><span class="cx">         self.protocol = protocol
</span></span></pre>
</div>
</div>

</body>
</html>