No Description

003-dead_lock.patch 5.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. diff -urN db-4.7.25.NC/lock/lock_deadlock.c db-4.7.25.NC.new/lock/lock_deadlock.c
  2. --- db-4.7.25.NC/lock/lock_deadlock.c 2008-03-10 14:31:33.000000000 +0100
  3. +++ db-4.7.25.NC.new/lock/lock_deadlock.c 2009-11-08 12:53:25.000000000 +0100
  4. @@ -121,7 +121,7 @@
  5. DB_LOCKTAB *lt;
  6. db_timespec now;
  7. locker_info *idmap;
  8. - u_int32_t *bitmap, *copymap, **deadp, **free_me, *tmpmap;
  9. + u_int32_t *bitmap, *copymap, **deadp, **deadlist, *tmpmap;
  10. u_int32_t i, cid, keeper, killid, limit, nalloc, nlockers;
  11. u_int32_t lock_max, txn_max;
  12. int ret, status;
  13. @@ -133,7 +133,8 @@
  14. if (IS_REP_CLIENT(env))
  15. atype = DB_LOCK_MINWRITE;
  16. - free_me = NULL;
  17. + copymap = tmpmap = NULL;
  18. + deadlist = NULL;
  19. lt = env->lk_handle;
  20. if (rejectp != NULL)
  21. @@ -179,11 +180,11 @@
  22. memcpy(copymap, bitmap, nlockers * sizeof(u_int32_t) * nalloc);
  23. if ((ret = __os_calloc(env, sizeof(u_int32_t), nalloc, &tmpmap)) != 0)
  24. - goto err1;
  25. + goto err;
  26. /* Find a deadlock. */
  27. if ((ret =
  28. - __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadp)) != 0)
  29. + __dd_find(env, bitmap, idmap, nlockers, nalloc, &deadlist)) != 0)
  30. return (ret);
  31. /*
  32. @@ -204,8 +205,7 @@
  33. txn_max = TXN_MAXIMUM;
  34. killid = BAD_KILLID;
  35. - free_me = deadp;
  36. - for (; *deadp != NULL; deadp++) {
  37. + for (deadp = deadlist; *deadp != NULL; deadp++) {
  38. if (rejectp != NULL)
  39. ++*rejectp;
  40. killid = (u_int32_t)(*deadp - bitmap) / nalloc;
  41. @@ -342,11 +342,12 @@
  42. __db_msg(env,
  43. "Aborting locker %lx", (u_long)idmap[killid].id);
  44. }
  45. - __os_free(env, tmpmap);
  46. -err1: __os_free(env, copymap);
  47. -
  48. -err: if (free_me != NULL)
  49. - __os_free(env, free_me);
  50. +err: if(copymap != NULL)
  51. + __os_free(env, copymap);
  52. + if (deadlist != NULL)
  53. + __os_free(env, deadlist);
  54. + if(tmpmap != NULL)
  55. + __os_free(env, tmpmap);
  56. __os_free(env, bitmap);
  57. __os_free(env, idmap);
  58. @@ -360,6 +361,17 @@
  59. #define DD_INVALID_ID ((u_int32_t) -1)
  60. +/*
  61. + * __dd_build --
  62. + * Build the lock dependency bit maps.
  63. + * Notes on syncronization:
  64. + * LOCK_SYSTEM_LOCK is used to hold objects locked when we have
  65. + * a single partition.
  66. + * LOCK_LOCKERS is held while we are walking the lockers list and
  67. + * to single thread the use of lockerp->dd_id.
  68. + * LOCK_DD protects the DD list of objects.
  69. + */
  70. +
  71. static int
  72. __dd_build(env, atype, bmp, nlockers, allocp, idmap, rejectp)
  73. ENV *env;
  74. @@ -393,6 +405,7 @@
  75. * In particular we do not build the conflict array and our caller
  76. * needs to expect this.
  77. */
  78. + LOCK_SYSTEM_LOCK(lt, region);
  79. if (atype == DB_LOCK_EXPIRE) {
  80. skip: LOCK_DD(env, region);
  81. op = SH_TAILQ_FIRST(&region->dd_objs, __db_lockobj);
  82. @@ -430,17 +443,18 @@
  83. OBJECT_UNLOCK(lt, region, indx);
  84. }
  85. UNLOCK_DD(env, region);
  86. + LOCK_SYSTEM_UNLOCK(lt, region);
  87. goto done;
  88. }
  89. /*
  90. - * We'll check how many lockers there are, add a few more in for
  91. - * good measure and then allocate all the structures. Then we'll
  92. - * verify that we have enough room when we go back in and get the
  93. - * mutex the second time.
  94. + * Allocate after locking the region
  95. + * to make sure the structures are large enough.
  96. */
  97. -retry: count = region->stat.st_nlockers;
  98. + LOCK_LOCKERS(env, region);
  99. + count = region->stat.st_nlockers;
  100. if (count == 0) {
  101. + UNLOCK_LOCKERS(env, region);
  102. *nlockers = 0;
  103. return (0);
  104. }
  105. @@ -448,50 +462,37 @@
  106. if (FLD_ISSET(env->dbenv->verbose, DB_VERB_DEADLOCK))
  107. __db_msg(env, "%lu lockers", (u_long)count);
  108. - count += 20;
  109. nentries = (u_int32_t)DB_ALIGN(count, 32) / 32;
  110. - /*
  111. - * Allocate enough space for a count by count bitmap matrix.
  112. - *
  113. - * XXX
  114. - * We can probably save the malloc's between iterations just
  115. - * reallocing if necessary because count grew by too much.
  116. - */
  117. + /* Allocate enough space for a count by count bitmap matrix. */
  118. if ((ret = __os_calloc(env, (size_t)count,
  119. - sizeof(u_int32_t) * nentries, &bitmap)) != 0)
  120. + sizeof(u_int32_t) * nentries, &bitmap)) != 0) {
  121. + UNLOCK_LOCKERS(env, region);
  122. return (ret);
  123. + }
  124. if ((ret = __os_calloc(env,
  125. sizeof(u_int32_t), nentries, &tmpmap)) != 0) {
  126. + UNLOCK_LOCKERS(env, region);
  127. __os_free(env, bitmap);
  128. return (ret);
  129. }
  130. if ((ret = __os_calloc(env,
  131. (size_t)count, sizeof(locker_info), &id_array)) != 0) {
  132. + UNLOCK_LOCKERS(env, region);
  133. __os_free(env, bitmap);
  134. __os_free(env, tmpmap);
  135. return (ret);
  136. }
  137. /*
  138. - * Now go back in and actually fill in the matrix.
  139. - */
  140. - if (region->stat.st_nlockers > count) {
  141. - __os_free(env, bitmap);
  142. - __os_free(env, tmpmap);
  143. - __os_free(env, id_array);
  144. - goto retry;
  145. - }
  146. -
  147. - /*
  148. * First we go through and assign each locker a deadlock detector id.
  149. */
  150. id = 0;
  151. - LOCK_LOCKERS(env, region);
  152. SH_TAILQ_FOREACH(lip, &region->lockers, ulinks, __db_locker) {
  153. if (lip->master_locker == INVALID_ROFF) {
  154. + DB_ASSERT(env, id < count);
  155. lip->dd_id = id++;
  156. id_array[lip->dd_id].id = lip->id;
  157. switch (atype) {
  158. @@ -510,7 +511,6 @@
  159. lip->dd_id = DD_INVALID_ID;
  160. }
  161. - UNLOCK_LOCKERS(env, region);
  162. /*
  163. * We only need consider objects that have waiters, so we use
  164. @@ -669,7 +669,6 @@
  165. * status after building the bit maps so that we will not detect
  166. * a blocked transaction without noting that it is already aborting.
  167. */
  168. - LOCK_LOCKERS(env, region);
  169. for (id = 0; id < count; id++) {
  170. if (!id_array[id].valid)
  171. continue;
  172. @@ -738,6 +737,7 @@
  173. id_array[id].in_abort = 1;
  174. }
  175. UNLOCK_LOCKERS(env, region);
  176. + LOCK_SYSTEM_UNLOCK(lt, region);
  177. /*
  178. * Now we can release everything except the bitmap matrix that we
  179. @@ -839,6 +839,7 @@
  180. ret = 0;
  181. /* We must lock so this locker cannot go away while we abort it. */
  182. + LOCK_SYSTEM_LOCK(lt, region);
  183. LOCK_LOCKERS(env, region);
  184. /*
  185. @@ -895,6 +896,7 @@
  186. done: OBJECT_UNLOCK(lt, region, info->last_ndx);
  187. err:
  188. out: UNLOCK_LOCKERS(env, region);
  189. + LOCK_SYSTEM_UNLOCK(lt, region);
  190. return (ret);
  191. }