LEFT | RIGHT |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 #include "runtime.h" | 5 #include "runtime.h" |
6 #include "arch_GOARCH.h" | 6 #include "arch_GOARCH.h" |
7 #include "zaexperiment.h" | 7 #include "zaexperiment.h" |
8 #include "malloc.h" | 8 #include "malloc.h" |
9 #include "stack.h" | 9 #include "stack.h" |
10 #include "race.h" | 10 #include "race.h" |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 G* runtime·lastg; | 72 G* runtime·lastg; |
73 M* runtime·allm; | 73 M* runtime·allm; |
74 M* runtime·extram; | 74 M* runtime·extram; |
75 int8* runtime·goos; | 75 int8* runtime·goos; |
76 int32 runtime·ncpu; | 76 int32 runtime·ncpu; |
77 static int32 newprocs; | 77 static int32 newprocs; |
78 | 78 |
79 void runtime·mstart(void); | 79 void runtime·mstart(void); |
80 static void runqput(P*, G*); | 80 static void runqput(P*, G*); |
81 static G* runqget(P*); | 81 static G* runqget(P*); |
82 static void runqgrow(P*); | 82 static bool runqputslow(P*, G*, uint32, uint32); |
83 static G* runqsteal(P*, P*); | 83 static G* runqsteal(P*, P*); |
84 static void mput(M*); | 84 static void mput(M*); |
85 static M* mget(void); | 85 static M* mget(void); |
86 static void mcommoninit(M*); | 86 static void mcommoninit(M*); |
87 static void schedule(void); | 87 static void schedule(void); |
88 static void procresize(int32); | 88 static void procresize(int32); |
89 static void acquirep(P*); | 89 static void acquirep(P*); |
90 static P* releasep(void); | 90 static P* releasep(void); |
91 static void newm(void(*)(void), P*); | 91 static void newm(void(*)(void), P*); |
92 static void stopm(void); | 92 static void stopm(void); |
93 static void startm(P*, bool); | 93 static void startm(P*, bool); |
94 static void handoffp(P*); | 94 static void handoffp(P*); |
95 static void wakep(void); | 95 static void wakep(void); |
96 static void stoplockedm(void); | 96 static void stoplockedm(void); |
97 static void startlockedm(G*); | 97 static void startlockedm(G*); |
98 static void sysmon(void); | 98 static void sysmon(void); |
99 static uint32 retake(int64); | 99 static uint32 retake(int64); |
100 static void incidlelocked(int32); | 100 static void incidlelocked(int32); |
101 static void checkdead(void); | 101 static void checkdead(void); |
102 static void exitsyscall0(G*); | 102 static void exitsyscall0(G*); |
103 static void park0(G*); | 103 static void park0(G*); |
104 static void goexit0(G*); | 104 static void goexit0(G*); |
105 static void gfput(P*, G*); | 105 static void gfput(P*, G*); |
106 static G* gfget(P*); | 106 static G* gfget(P*); |
107 static void gfpurge(P*); | 107 static void gfpurge(P*); |
108 static void globrunqput(G*); | 108 static void globrunqput(G*); |
| 109 static void globrunqputbatch(G*, G*, int32); |
109 static G* globrunqget(P*, int32); | 110 static G* globrunqget(P*, int32); |
110 static P* pidleget(void); | 111 static P* pidleget(void); |
111 static void pidleput(P*); | 112 static void pidleput(P*); |
112 static void injectglist(G*); | 113 static void injectglist(G*); |
113 static bool preemptall(void); | 114 static bool preemptall(void); |
114 static bool preemptone(P*); | 115 static bool preemptone(P*); |
115 static bool exitsyscallfast(void); | 116 static bool exitsyscallfast(void); |
116 static bool haveexperiment(int8*); | 117 static bool haveexperiment(int8*); |
117 | 118 |
118 // The bootstrap sequence is: | 119 // The bootstrap sequence is: |
119 // | 120 // |
120 // call osinit | 121 // call osinit |
121 // call schedinit | 122 // call schedinit |
122 // make & queue new G | 123 // make & queue new G |
123 // call runtime·mstart | 124 // call runtime·mstart |
124 // | 125 // |
125 // The new G calls runtime·main. | 126 // The new G calls runtime·main. |
126 void | 127 void |
127 runtime·schedinit(void) | 128 runtime·schedinit(void) |
128 { | 129 { |
129 int32 n, procs; | 130 int32 n, procs; |
130 byte *p; | 131 byte *p; |
131 Eface i; | 132 Eface i; |
132 | 133 |
133 runtime·sched.maxmcount = 10000; | 134 runtime·sched.maxmcount = 10000; |
134 runtime·precisestack = haveexperiment("precisestack"); | 135 runtime·precisestack = haveexperiment("precisestack"); |
135 | 136 |
136 runtime·mprofinit(); | |
137 runtime·mallocinit(); | 137 runtime·mallocinit(); |
138 mcommoninit(m); | 138 mcommoninit(m); |
139 ········ | 139 ········ |
140 // Initialize the itable value for newErrorCString, | 140 // Initialize the itable value for newErrorCString, |
141 // so that the next time it gets called, possibly | 141 // so that the next time it gets called, possibly |
142 // in a fault during a garbage collection, it will not | 142 // in a fault during a garbage collection, it will not |
143 // need to allocated memory. | 143 // need to allocated memory. |
144 runtime·newErrorCString(0, &i); | 144 runtime·newErrorCString(0, &i); |
145 | 145 |
146 runtime·goargs(); | 146 runtime·goargs(); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
202 d.fn = &initDone; | 202 d.fn = &initDone; |
203 d.siz = 0; | 203 d.siz = 0; |
204 d.link = g->defer; | 204 d.link = g->defer; |
205 d.argp = (void*)-1; | 205 d.argp = (void*)-1; |
206 d.special = true; | 206 d.special = true; |
207 d.free = false; | 207 d.free = false; |
208 g->defer = &d; | 208 g->defer = &d; |
209 | 209 |
210 if(m != &runtime·m0) | 210 if(m != &runtime·m0) |
211 runtime·throw("runtime·main not on m0"); | 211 runtime·throw("runtime·main not on m0"); |
212 » // TODO(dfc) disable the scavenger | 212 » runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); |
213 » // runtime·newproc1(&scavenger, nil, 0, 0, runtime·main); | |
214 main·init(); | 213 main·init(); |
215 | 214 |
216 if(g->defer != &d || d.fn != &initDone) | 215 if(g->defer != &d || d.fn != &initDone) |
217 runtime·throw("runtime: bad defer entry after init"); | 216 runtime·throw("runtime: bad defer entry after init"); |
218 g->defer = d.link; | 217 g->defer = d.link; |
219 runtime·unlockOSThread(); | 218 runtime·unlockOSThread(); |
220 | 219 |
221 main·main(); | 220 main·main(); |
222 if(raceenabled) | 221 if(raceenabled) |
223 runtime·racefini(); | 222 runtime·racefini(); |
224 | 223 |
225 // Make racy client program work: if panicking on | 224 // Make racy client program work: if panicking on |
226 // another goroutine at the same time as main returns, | 225 // another goroutine at the same time as main returns, |
227 // let the other goroutine finish printing the panic trace. | 226 // let the other goroutine finish printing the panic trace. |
228 // Once it does, it will exit. See issue 3934. | 227 // Once it does, it will exit. See issue 3934. |
229 if(runtime·panicking) | 228 if(runtime·panicking) |
230 runtime·park(nil, nil, "panicwait"); | 229 runtime·park(nil, nil, "panicwait"); |
231 | 230 |
232 runtime·exit(0); | 231 runtime·exit(0); |
233 for(;;) | 232 for(;;) |
234 *(int32*)runtime·main = 0; | 233 *(int32*)runtime·main = 0; |
235 } | 234 } |
236 | 235 |
237 void | 236 void |
238 runtime·goroutineheader(G *gp) | 237 runtime·goroutineheader(G *gp) |
239 { | 238 { |
240 int8 *status; | 239 int8 *status; |
| 240 int64 waitfor; |
241 | 241 |
242 switch(gp->status) { | 242 switch(gp->status) { |
243 case Gidle: | 243 case Gidle: |
244 status = "idle"; | 244 status = "idle"; |
245 break; | 245 break; |
246 case Grunnable: | 246 case Grunnable: |
247 status = "runnable"; | 247 status = "runnable"; |
248 break; | 248 break; |
249 case Grunning: | 249 case Grunning: |
250 status = "running"; | 250 status = "running"; |
251 break; | 251 break; |
252 case Gsyscall: | 252 case Gsyscall: |
253 status = "syscall"; | 253 status = "syscall"; |
254 break; | 254 break; |
255 case Gwaiting: | 255 case Gwaiting: |
256 if(gp->waitreason) | 256 if(gp->waitreason) |
257 status = gp->waitreason; | 257 status = gp->waitreason; |
258 else | 258 else |
259 status = "waiting"; | 259 status = "waiting"; |
260 break; | 260 break; |
261 default: | 261 default: |
262 status = "???"; | 262 status = "???"; |
263 break; | 263 break; |
264 } | 264 } |
265 » runtime·printf("goroutine %D [%s]:\n", gp->goid, status); | 265 |
| 266 » // approx time the G is blocked, in minutes |
| 267 » waitfor = 0; |
| 268 » if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince !
= 0) |
| 269 » » waitfor = (runtime·nanotime() - gp->waitsince) / (60LL*1000*1000
*1000); |
| 270 |
| 271 » if(waitfor < 1) |
| 272 » » runtime·printf("goroutine %D [%s]:\n", gp->goid, status); |
| 273 » else |
| 274 » » runtime·printf("goroutine %D [%s, %D minutes]:\n", gp->goid, sta
tus, waitfor); |
266 } | 275 } |
267 | 276 |
268 void | 277 void |
269 runtime·tracebackothers(G *me) | 278 runtime·tracebackothers(G *me) |
270 { | 279 { |
271 G *gp; | 280 G *gp; |
272 int32 traceback; | 281 int32 traceback; |
273 | 282 |
274 traceback = runtime·gotraceback(nil); | 283 traceback = runtime·gotraceback(nil); |
275 ········ | 284 ········ |
276 // Show the current goroutine first, if we haven't already. | 285 // Show the current goroutine first, if we haven't already. |
277 if((gp = m->curg) != nil && gp != me) { | 286 if((gp = m->curg) != nil && gp != me) { |
278 runtime·printf("\n"); | 287 runtime·printf("\n"); |
279 runtime·goroutineheader(gp); | 288 runtime·goroutineheader(gp); |
280 » » runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.lr, gp); | 289 » » runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); |
281 } | 290 } |
282 | 291 |
283 for(gp = runtime·allg; gp != nil; gp = gp->alllink) { | 292 for(gp = runtime·allg; gp != nil; gp = gp->alllink) { |
284 if(gp == me || gp == m->curg || gp->status == Gdead) | 293 if(gp == me || gp == m->curg || gp->status == Gdead) |
285 continue; | 294 continue; |
286 if(gp->issystem && traceback < 2) | 295 if(gp->issystem && traceback < 2) |
287 continue; | 296 continue; |
288 runtime·printf("\n"); | 297 runtime·printf("\n"); |
289 runtime·goroutineheader(gp); | 298 runtime·goroutineheader(gp); |
290 if(gp->status == Grunning) { | 299 if(gp->status == Grunning) { |
291 runtime·printf("\tgoroutine running on other thread; sta
ck unavailable\n"); | 300 runtime·printf("\tgoroutine running on other thread; sta
ck unavailable\n"); |
292 runtime·printcreatedby(gp); | 301 runtime·printcreatedby(gp); |
293 } else | 302 } else |
294 » » » runtime·traceback(gp->sched.pc, gp->sched.sp, gp->sched.
lr, gp); | 303 » » » runtime·traceback(~(uintptr)0, ~(uintptr)0, 0, gp); |
295 } | 304 } |
296 } | 305 } |
297 | 306 |
298 static void | 307 static void |
299 checkmcount(void) | 308 checkmcount(void) |
300 { | 309 { |
301 // sched lock is held | 310 // sched lock is held |
302 if(runtime·sched.mcount > runtime·sched.maxmcount) { | 311 if(runtime·sched.mcount > runtime·sched.maxmcount) { |
303 runtime·printf("runtime: program exceeds %d-thread limit\n", run
time·sched.maxmcount); | 312 runtime·printf("runtime: program exceeds %d-thread limit\n", run
time·sched.maxmcount); |
304 runtime·throw("thread exhaustion"); | 313 runtime·throw("thread exhaustion"); |
(...skipping 334 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
639 Eface e; | 648 Eface e; |
640 runtime·gc_m_ptr(&e); | 649 runtime·gc_m_ptr(&e); |
641 mtype = ((PtrType*)e.type)->elem; | 650 mtype = ((PtrType*)e.type)->elem; |
642 } | 651 } |
643 | 652 |
644 mp = runtime·cnew(mtype); | 653 mp = runtime·cnew(mtype); |
645 mcommoninit(mp); | 654 mcommoninit(mp); |
646 | 655 |
647 // In case of cgo or Solaris, pthread_create will make us a stack. | 656 // In case of cgo or Solaris, pthread_create will make us a stack. |
648 // Windows will layout sched stack on OS stack. | 657 // Windows will layout sched stack on OS stack. |
649 » if(runtime·iscgo || Windows || Sunos) | 658 » if(runtime·iscgo || Windows || Solaris) |
650 mp->g0 = runtime·malg(-1); | 659 mp->g0 = runtime·malg(-1); |
651 else | 660 else |
652 mp->g0 = runtime·malg(8192); | 661 mp->g0 = runtime·malg(8192); |
653 | 662 |
654 if(p == m->p) | 663 if(p == m->p) |
655 releasep(); | 664 releasep(); |
656 m->locks--; | 665 m->locks--; |
657 if(m->locks == 0 && g->preempt) // restore the preemption request in ca
se we've cleared it in newstack | 666 if(m->locks == 0 && g->preempt) // restore the preemption request in ca
se we've cleared it in newstack |
658 g->stackguard0 = StackPreempt; | 667 g->stackguard0 = StackPreempt; |
659 | 668 |
(...skipping 282 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
942 m->nextp = nil; | 951 m->nextp = nil; |
943 } | 952 } |
944 | 953 |
945 static void | 954 static void |
946 mspinning(void) | 955 mspinning(void) |
947 { | 956 { |
948 m->spinning = true; | 957 m->spinning = true; |
949 } | 958 } |
950 | 959 |
951 // Schedules some M to run the p (creates an M if necessary). | 960 // Schedules some M to run the p (creates an M if necessary). |
952 // If p==nil, tries to get an idle P, if no idle P's returns false. | 961 // If p==nil, tries to get an idle P, if no idle P's does nothing. |
953 static void | 962 static void |
954 startm(P *p, bool spinning) | 963 startm(P *p, bool spinning) |
955 { | 964 { |
956 M *mp; | 965 M *mp; |
957 void (*fn)(void); | 966 void (*fn)(void); |
958 | 967 |
959 runtime·lock(&runtime·sched); | 968 runtime·lock(&runtime·sched); |
960 if(p == nil) { | 969 if(p == nil) { |
961 p = pidleget(); | 970 p = pidleget(); |
962 if(p == nil) { | 971 if(p == nil) { |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1106 static void | 1115 static void |
1107 execute(G *gp) | 1116 execute(G *gp) |
1108 { | 1117 { |
1109 int32 hz; | 1118 int32 hz; |
1110 | 1119 |
1111 if(gp->status != Grunnable) { | 1120 if(gp->status != Grunnable) { |
1112 runtime·printf("execute: bad g status %d\n", gp->status); | 1121 runtime·printf("execute: bad g status %d\n", gp->status); |
1113 runtime·throw("execute: bad g status"); | 1122 runtime·throw("execute: bad g status"); |
1114 } | 1123 } |
1115 gp->status = Grunning; | 1124 gp->status = Grunning; |
| 1125 gp->waitsince = 0; |
1116 gp->preempt = false; | 1126 gp->preempt = false; |
1117 gp->stackguard0 = gp->stackguard; | 1127 gp->stackguard0 = gp->stackguard; |
1118 m->p->schedtick++; | 1128 m->p->schedtick++; |
1119 m->curg = gp; | 1129 m->curg = gp; |
1120 gp->m = m; | 1130 gp->m = m; |
1121 | 1131 |
1122 // Check whether the profiler needs to be turned on or off. | 1132 // Check whether the profiler needs to be turned on or off. |
1123 hz = runtime·sched.profilehz; | 1133 hz = runtime·sched.profilehz; |
1124 if(m->profilehz != hz) | 1134 if(m->profilehz != hz) |
1125 runtime·resetcpuprofiler(hz); | 1135 runtime·resetcpuprofiler(hz); |
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1529 // from the low-level system calls used by the runtime. | 1539 // from the low-level system calls used by the runtime. |
1530 #pragma textflag NOSPLIT | 1540 #pragma textflag NOSPLIT |
1531 void | 1541 void |
1532 runtime·exitsyscall(void) | 1542 runtime·exitsyscall(void) |
1533 { | 1543 { |
1534 m->locks++; // see comment in entersyscall | 1544 m->locks++; // see comment in entersyscall |
1535 | 1545 |
1536 if(g->isbackground) // do not consider blocked scavenger for deadlock d
etection | 1546 if(g->isbackground) // do not consider blocked scavenger for deadlock d
etection |
1537 incidlelocked(-1); | 1547 incidlelocked(-1); |
1538 | 1548 |
| 1549 g->waitsince = 0; |
1539 if(exitsyscallfast()) { | 1550 if(exitsyscallfast()) { |
1540 // There's a cpu for us, so we can run. | 1551 // There's a cpu for us, so we can run. |
1541 m->p->syscalltick++; | 1552 m->p->syscalltick++; |
1542 g->status = Grunning; | 1553 g->status = Grunning; |
1543 // Garbage collector isn't running (since we are), | 1554 // Garbage collector isn't running (since we are), |
1544 // so okay to clear gcstack and gcsp. | 1555 // so okay to clear gcstack and gcsp. |
1545 g->syscallstack = (uintptr)nil; | 1556 g->syscallstack = (uintptr)nil; |
1546 g->syscallsp = (uintptr)nil; | 1557 g->syscallsp = (uintptr)nil; |
1547 m->locks--; | 1558 m->locks--; |
1548 if(g->preempt) { | 1559 if(g->preempt) { |
(...skipping 661 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2210 p->id = i; | 2221 p->id = i; |
2211 p->status = Pgcstop; | 2222 p->status = Pgcstop; |
2212 runtime·atomicstorep(&runtime·allp[i], p); | 2223 runtime·atomicstorep(&runtime·allp[i], p); |
2213 } | 2224 } |
2214 if(p->mcache == nil) { | 2225 if(p->mcache == nil) { |
2215 if(old==0 && i==0) | 2226 if(old==0 && i==0) |
2216 p->mcache = m->mcache; // bootstrap | 2227 p->mcache = m->mcache; // bootstrap |
2217 else | 2228 else |
2218 p->mcache = runtime·allocmcache(); | 2229 p->mcache = runtime·allocmcache(); |
2219 } | 2230 } |
2220 if(p->runq == nil) { | |
2221 p->runqsize = 128; | |
2222 p->runq = (G**)runtime·mallocgc(p->runqsize*sizeof(G*),
0, FlagNoInvokeGC); | |
2223 } | |
2224 } | 2231 } |
2225 | 2232 |
2226 // redistribute runnable G's evenly | 2233 // redistribute runnable G's evenly |
| 2234 // collect all runnable goroutines in global queue |
2227 for(i = 0; i < old; i++) { | 2235 for(i = 0; i < old; i++) { |
2228 p = runtime·allp[i]; | 2236 p = runtime·allp[i]; |
2229 while(gp = runqget(p)) | 2237 while(gp = runqget(p)) |
2230 globrunqput(gp); | 2238 globrunqput(gp); |
2231 } | 2239 } |
| 2240 // fill local queues with at most nelem(p->runq)/2 goroutines |
2232 // start at 1 because current M already executes some G and will acquire
allp[0] below, | 2241 // start at 1 because current M already executes some G and will acquire
allp[0] below, |
2233 // so if we have a spare G we want to put it into allp[1]. | 2242 // so if we have a spare G we want to put it into allp[1]. |
2234 » for(i = 1; runtime·sched.runqhead; i++) { | 2243 » for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++
) { |
2235 gp = runtime·sched.runqhead; | 2244 gp = runtime·sched.runqhead; |
2236 runtime·sched.runqhead = gp->schedlink; | 2245 runtime·sched.runqhead = gp->schedlink; |
| 2246 if(runtime·sched.runqhead == nil) |
| 2247 runtime·sched.runqtail = nil; |
| 2248 runtime·sched.runqsize--; |
2237 runqput(runtime·allp[i%new], gp); | 2249 runqput(runtime·allp[i%new], gp); |
2238 } | 2250 } |
2239 runtime·sched.runqtail = nil; | |
2240 runtime·sched.runqsize = 0; | |
2241 | 2251 |
2242 // free unused P's | 2252 // free unused P's |
2243 for(i = new; i < old; i++) { | 2253 for(i = new; i < old; i++) { |
2244 p = runtime·allp[i]; | 2254 p = runtime·allp[i]; |
2245 runtime·freemcache(p->mcache); | 2255 runtime·freemcache(p->mcache); |
2246 p->mcache = nil; | 2256 p->mcache = nil; |
2247 gfpurge(p); | 2257 gfpurge(p); |
2248 p->status = Pdead; | 2258 p->status = Pdead; |
2249 // can't free P itself because it can be referenced by an M in s
yscall | 2259 // can't free P itself because it can be referenced by an M in s
yscall |
2250 } | 2260 } |
(...skipping 268 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2519 gp->stackguard0 = StackPreempt; | 2529 gp->stackguard0 = StackPreempt; |
2520 return true; | 2530 return true; |
2521 } | 2531 } |
2522 | 2532 |
2523 void | 2533 void |
2524 runtime·schedtrace(bool detailed) | 2534 runtime·schedtrace(bool detailed) |
2525 { | 2535 { |
2526 static int64 starttime; | 2536 static int64 starttime; |
2527 int64 now; | 2537 int64 now; |
2528 int64 id1, id2, id3; | 2538 int64 id1, id2, id3; |
2529 » int32 i, q, t, h, s; | 2539 » int32 i, t, h; |
2530 int8 *fmt; | 2540 int8 *fmt; |
2531 M *mp, *lockedm; | 2541 M *mp, *lockedm; |
2532 G *gp, *lockedg; | 2542 G *gp, *lockedg; |
2533 P *p; | 2543 P *p; |
2534 | 2544 |
2535 now = runtime·nanotime(); | 2545 now = runtime·nanotime(); |
2536 if(starttime == 0) | 2546 if(starttime == 0) |
2537 starttime = now; | 2547 starttime = now; |
2538 | 2548 |
2539 runtime·lock(&runtime·sched); | 2549 runtime·lock(&runtime·sched); |
2540 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth
reads=%d runqueue=%d", | 2550 runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idleth
reads=%d runqueue=%d", |
2541 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl
e, runtime·sched.mcount, | 2551 (now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidl
e, runtime·sched.mcount, |
2542 runtime·sched.nmidle, runtime·sched.runqsize); | 2552 runtime·sched.nmidle, runtime·sched.runqsize); |
2543 if(detailed) { | 2553 if(detailed) { |
2544 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop
wait=%d sysmonwait=%d\n", | 2554 runtime·printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stop
wait=%d sysmonwait=%d\n", |
2545 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run
time·sched.nmspinning, | 2555 runtime·sched.gcwaiting, runtime·sched.nmidlelocked, run
time·sched.nmspinning, |
2546 runtime·sched.stopwait, runtime·sched.sysmonwait); | 2556 runtime·sched.stopwait, runtime·sched.sysmonwait); |
2547 } | 2557 } |
2548 // We must be careful while reading data from P's, M's and G's. | 2558 // We must be careful while reading data from P's, M's and G's. |
2549 // Even if we hold schedlock, most data can be changed concurrently. | 2559 // Even if we hold schedlock, most data can be changed concurrently. |
2550 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to
nil. | 2560 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to
nil. |
2551 for(i = 0; i < runtime·gomaxprocs; i++) { | 2561 for(i = 0; i < runtime·gomaxprocs; i++) { |
2552 p = runtime·allp[i]; | 2562 p = runtime·allp[i]; |
2553 if(p == nil) | 2563 if(p == nil) |
2554 continue; | 2564 continue; |
2555 mp = p->m; | 2565 mp = p->m; |
2556 » » t = p->runqtail; | 2566 » » h = runtime·atomicload(&p->runqhead); |
2557 » » h = p->runqhead; | 2567 » » t = runtime·atomicload(&p->runqtail); |
2558 » » s = p->runqsize; | |
2559 » » q = t - h; | |
2560 » » if(q < 0) | |
2561 » » » q += s; | |
2562 if(detailed) | 2568 if(detailed) |
2563 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic
k=%d m=%d runqsize=%d/%d gfreecnt=%d\n", | 2569 » » » runtime·printf(" P%d: status=%d schedtick=%d syscalltic
k=%d m=%d runqsize=%d gfreecnt=%d\n", |
2564 » » » » i, p->status, p->schedtick, p->syscalltick, mp ?
mp->id : -1, q, s, p->gfreecnt); | 2570 » » » » i, p->status, p->schedtick, p->syscalltick, mp ?
mp->id : -1, t-h, p->gfreecnt); |
2565 else { | 2571 else { |
2566 // In non-detailed mode format lengths of per-P run queu
es as: | 2572 // In non-detailed mode format lengths of per-P run queu
es as: |
2567 // [len1 len2 len3 len4] | 2573 // [len1 len2 len3 len4] |
2568 fmt = " %d"; | 2574 fmt = " %d"; |
2569 if(runtime·gomaxprocs == 1) | 2575 if(runtime·gomaxprocs == 1) |
2570 fmt = " [%d]\n"; | 2576 fmt = " [%d]\n"; |
2571 else if(i == 0) | 2577 else if(i == 0) |
2572 fmt = " [%d"; | 2578 fmt = " [%d"; |
2573 else if(i == runtime·gomaxprocs-1) | 2579 else if(i == runtime·gomaxprocs-1) |
2574 fmt = " %d]\n"; | 2580 fmt = " %d]\n"; |
2575 » » » runtime·printf(fmt, q); | 2581 » » » runtime·printf(fmt, t-h); |
2576 } | 2582 } |
2577 } | 2583 } |
2578 if(!detailed) { | 2584 if(!detailed) { |
2579 runtime·unlock(&runtime·sched); | 2585 runtime·unlock(&runtime·sched); |
2580 return; | 2586 return; |
2581 } | 2587 } |
2582 for(mp = runtime·allm; mp; mp = mp->alllink) { | 2588 for(mp = runtime·allm; mp; mp = mp->alllink) { |
2583 p = mp->p; | 2589 p = mp->p; |
2584 gp = mp->curg; | 2590 gp = mp->curg; |
2585 lockedg = mp->lockedg; | 2591 lockedg = mp->lockedg; |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2640 { | 2646 { |
2641 gp->schedlink = nil; | 2647 gp->schedlink = nil; |
2642 if(runtime·sched.runqtail) | 2648 if(runtime·sched.runqtail) |
2643 runtime·sched.runqtail->schedlink = gp; | 2649 runtime·sched.runqtail->schedlink = gp; |
2644 else | 2650 else |
2645 runtime·sched.runqhead = gp; | 2651 runtime·sched.runqhead = gp; |
2646 runtime·sched.runqtail = gp; | 2652 runtime·sched.runqtail = gp; |
2647 runtime·sched.runqsize++; | 2653 runtime·sched.runqsize++; |
2648 } | 2654 } |
2649 | 2655 |
| 2656 // Put a batch of runnable goroutines on the global runnable queue. |
| 2657 // Sched must be locked. |
| 2658 static void |
| 2659 globrunqputbatch(G *ghead, G *gtail, int32 n) |
| 2660 { |
| 2661 gtail->schedlink = nil; |
| 2662 if(runtime·sched.runqtail) |
| 2663 runtime·sched.runqtail->schedlink = ghead; |
| 2664 else |
| 2665 runtime·sched.runqhead = ghead; |
| 2666 runtime·sched.runqtail = gtail; |
| 2667 runtime·sched.runqsize += n; |
| 2668 } |
| 2669 |
2650 // Try get a batch of G's from the global runnable queue. | 2670 // Try get a batch of G's from the global runnable queue. |
2651 // Sched must be locked. | 2671 // Sched must be locked. |
2652 static G* | 2672 static G* |
2653 globrunqget(P *p, int32 max) | 2673 globrunqget(P *p, int32 max) |
2654 { | 2674 { |
2655 G *gp, *gp1; | 2675 G *gp, *gp1; |
2656 int32 n; | 2676 int32 n; |
2657 | 2677 |
2658 if(runtime·sched.runqsize == 0) | 2678 if(runtime·sched.runqsize == 0) |
2659 return nil; | 2679 return nil; |
2660 n = runtime·sched.runqsize/runtime·gomaxprocs+1; | 2680 n = runtime·sched.runqsize/runtime·gomaxprocs+1; |
2661 if(n > runtime·sched.runqsize) | 2681 if(n > runtime·sched.runqsize) |
2662 n = runtime·sched.runqsize; | 2682 n = runtime·sched.runqsize; |
2663 if(max > 0 && n > max) | 2683 if(max > 0 && n > max) |
2664 n = max; | 2684 n = max; |
| 2685 if(n > nelem(p->runq)/2) |
| 2686 n = nelem(p->runq)/2; |
2665 runtime·sched.runqsize -= n; | 2687 runtime·sched.runqsize -= n; |
2666 if(runtime·sched.runqsize == 0) | 2688 if(runtime·sched.runqsize == 0) |
2667 runtime·sched.runqtail = nil; | 2689 runtime·sched.runqtail = nil; |
2668 gp = runtime·sched.runqhead; | 2690 gp = runtime·sched.runqhead; |
2669 runtime·sched.runqhead = gp->schedlink; | 2691 runtime·sched.runqhead = gp->schedlink; |
2670 n--; | 2692 n--; |
2671 while(n--) { | 2693 while(n--) { |
2672 gp1 = runtime·sched.runqhead; | 2694 gp1 = runtime·sched.runqhead; |
2673 runtime·sched.runqhead = gp1->schedlink; | 2695 runtime·sched.runqhead = gp1->schedlink; |
2674 runqput(p, gp1); | 2696 runqput(p, gp1); |
(...skipping 19 matching lines...) Expand all Loading... |
2694 P *p; | 2716 P *p; |
2695 | 2717 |
2696 p = runtime·sched.pidle; | 2718 p = runtime·sched.pidle; |
2697 if(p) { | 2719 if(p) { |
2698 runtime·sched.pidle = p->link; | 2720 runtime·sched.pidle = p->link; |
2699 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic | 2721 runtime·xadd(&runtime·sched.npidle, -1); // TODO: fast atomic |
2700 } | 2722 } |
2701 return p; | 2723 return p; |
2702 } | 2724 } |
2703 | 2725 |
2704 // Put g on local runnable queue. | 2726 // Try to put g on local runnable queue. |
2705 // TODO(dvyukov): consider using lock-free queue. | 2727 // If it's full, put onto global queue. |
| 2728 // Executed only by the owner P. |
2706 static void | 2729 static void |
2707 runqput(P *p, G *gp) | 2730 runqput(P *p, G *gp) |
2708 { | 2731 { |
2709 » int32 h, t, s; | 2732 » uint32 h, t; |
2710 | 2733 |
2711 » runtime·lock(p); | |
2712 retry: | 2734 retry: |
2713 » h = p->runqhead; | 2735 » h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with
consumers |
2714 t = p->runqtail; | 2736 t = p->runqtail; |
2715 » s = p->runqsize; | 2737 » if(t - h < nelem(p->runq)) { |
2716 » if(t == h-1 || (h == 0 && t == s-1)) { | 2738 » » p->runq[t%nelem(p->runq)] = gp; |
2717 » » runqgrow(p); | 2739 » » runtime·atomicstore(&p->runqtail, t+1); // store-release, makes
the item available for consumption |
2718 » » goto retry; | 2740 » » return; |
2719 » } | 2741 » } |
2720 » p->runq[t++] = gp; | 2742 » if(runqputslow(p, gp, h, t)) |
2721 » if(t == s) | 2743 » » return; |
2722 » » t = 0; | 2744 » // the queue is not full, now the put above must suceed |
2723 » p->runqtail = t; | 2745 » goto retry; |
2724 » runtime·unlock(p); | 2746 } |
| 2747 |
| 2748 // Put g and a batch of work from local runnable queue on global queue. |
| 2749 // Executed only by the owner P. |
| 2750 static bool |
| 2751 runqputslow(P *p, G *gp, uint32 h, uint32 t) |
| 2752 { |
| 2753 » G *batch[nelem(p->runq)/2+1]; |
| 2754 » uint32 n, i; |
| 2755 |
| 2756 » // First, grab a batch from local queue. |
| 2757 » n = t-h; |
| 2758 » n = n/2; |
| 2759 » if(n != nelem(p->runq)/2) |
| 2760 » » runtime·throw("runqputslow: queue is not full"); |
| 2761 » for(i=0; i<n; i++) |
| 2762 » » batch[i] = p->runq[(h+i)%nelem(p->runq)]; |
| 2763 » if(!runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits consume |
| 2764 » » return false; |
| 2765 » batch[n] = gp; |
| 2766 » // Link the goroutines. |
| 2767 » for(i=0; i<n; i++) |
| 2768 » » batch[i]->schedlink = batch[i+1]; |
| 2769 » // Now put the batch on global queue. |
| 2770 » runtime·lock(&runtime·sched); |
| 2771 » globrunqputbatch(batch[0], batch[n], n+1); |
| 2772 » runtime·unlock(&runtime·sched); |
| 2773 » return true; |
2725 } | 2774 } |
2726 | 2775 |
2727 // Get g from local runnable queue. | 2776 // Get g from local runnable queue. |
| 2777 // Executed only by the owner P. |
2728 static G* | 2778 static G* |
2729 runqget(P *p) | 2779 runqget(P *p) |
2730 { | 2780 { |
2731 G *gp; | 2781 G *gp; |
2732 » int32 t, h, s; | 2782 » uint32 t, h; |
2733 | 2783 |
2734 » if(p->runqhead == p->runqtail) | 2784 » for(;;) { |
2735 » » return nil; | 2785 » » h = runtime·atomicload(&p->runqhead); // load-acquire, synchron
ize with other consumers |
2736 » runtime·lock(p); | 2786 » » t = p->runqtail; |
2737 » h = p->runqhead; | 2787 » » if(t == h) |
2738 » t = p->runqtail; | 2788 » » » return nil; |
2739 » s = p->runqsize; | 2789 » » gp = p->runq[h%nelem(p->runq)]; |
2740 » if(t == h) { | 2790 » » if(runtime·cas(&p->runqhead, h, h+1)) // cas-release, commits c
onsume |
2741 » » runtime·unlock(p); | 2791 » » » return gp; |
2742 » » return nil; | 2792 » } |
2743 » } | 2793 } |
2744 » gp = p->runq[h++]; | 2794 |
2745 » if(h == s) | 2795 // Grabs a batch of goroutines from local runnable queue. |
2746 » » h = 0; | 2796 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed gorou
tines. |
2747 » p->runqhead = h; | 2797 // Can be executed by any P. |
2748 » runtime·unlock(p); | 2798 static uint32 |
2749 » return gp; | 2799 runqgrab(P *p, G **batch) |
2750 } | 2800 { |
2751 | 2801 » uint32 t, h, n, i; |
2752 // Grow local runnable queue. | 2802 |
2753 // TODO(dvyukov): consider using fixed-size array | 2803 » for(;;) { |
2754 // and transfer excess to the global list (local queue can grow way too big). | 2804 » » h = runtime·atomicload(&p->runqhead); // load-acquire, synchron
ize with other consumers |
2755 static void | 2805 » » t = runtime·atomicload(&p->runqtail); // load-acquire, synchron
ize with the producer |
2756 runqgrow(P *p) | 2806 » » n = t-h; |
2757 { | 2807 » » n = n - n/2; |
2758 » G **q; | 2808 » » if(n == 0) |
2759 » int32 s, t, h, t2; | 2809 » » » break; |
2760 | 2810 » » if(n > nelem(p->runq)/2) // read inconsistent h and t |
2761 » h = p->runqhead; | 2811 » » » continue; |
2762 » t = p->runqtail; | 2812 » » for(i=0; i<n; i++) |
2763 » s = p->runqsize; | 2813 » » » batch[i] = p->runq[(h+i)%nelem(p->runq)]; |
2764 » t2 = 0; | 2814 » » if(runtime·cas(&p->runqhead, h, h+n)) // cas-release, commits c
onsume |
2765 » q = runtime·malloc(2*s*sizeof(*q)); | 2815 » » » break; |
2766 » while(t != h) { | 2816 » } |
2767 » » q[t2++] = p->runq[h++]; | 2817 » return n; |
2768 » » if(h == s) | |
2769 » » » h = 0; | |
2770 » } | |
2771 » runtime·free(p->runq); | |
2772 » p->runq = q; | |
2773 » p->runqhead = 0; | |
2774 » p->runqtail = t2; | |
2775 » p->runqsize = 2*s; | |
2776 } | 2818 } |
2777 | 2819 |
2778 // Steal half of elements from local runnable queue of p2 | 2820 // Steal half of elements from local runnable queue of p2 |
2779 // and put onto local runnable queue of p. | 2821 // and put onto local runnable queue of p. |
2780 // Returns one of the stolen elements (or nil if failed). | 2822 // Returns one of the stolen elements (or nil if failed). |
2781 static G* | 2823 static G* |
2782 runqsteal(P *p, P *p2) | 2824 runqsteal(P *p, P *p2) |
2783 { | 2825 { |
2784 » G *gp, *gp1; | 2826 » G *gp; |
2785 » int32 t, h, s, t2, h2, s2, c, i; | 2827 » G *batch[nelem(p->runq)/2]; |
2786 | 2828 » uint32 t, h, n, i; |
2787 » if(p2->runqhead == p2->runqtail) | 2829 |
| 2830 » n = runqgrab(p2, batch); |
| 2831 » if(n == 0) |
2788 return nil; | 2832 return nil; |
2789 » // sort locks to prevent deadlocks | 2833 » n--; |
2790 » if(p < p2) | 2834 » gp = batch[n]; |
2791 » » runtime·lock(p); | 2835 » if(n == 0) |
2792 » runtime·lock(p2); | 2836 » » return gp; |
2793 » if(p2->runqhead == p2->runqtail) { | 2837 » h = runtime·atomicload(&p->runqhead); // load-acquire, synchronize with
consumers |
2794 » » runtime·unlock(p2); | |
2795 » » if(p < p2) | |
2796 » » » runtime·unlock(p); | |
2797 » » return nil; | |
2798 » } | |
2799 » if(p >= p2) | |
2800 » » runtime·lock(p); | |
2801 » // now we've locked both queues and know the victim is not empty | |
2802 » h = p->runqhead; | |
2803 t = p->runqtail; | 2838 t = p->runqtail; |
2804 » s = p->runqsize; | 2839 » if(t - h + n >= nelem(p->runq)) |
2805 » h2 = p2->runqhead; | 2840 » » runtime·throw("runqsteal: runq overflow"); |
2806 » t2 = p2->runqtail; | 2841 » for(i=0; i<n; i++, t++) |
2807 » s2 = p2->runqsize; | 2842 » » p->runq[t%nelem(p->runq)] = batch[i]; |
2808 » gp = p2->runq[h2++]; // return value | 2843 » runtime·atomicstore(&p->runqtail, t); // store-release, makes the item
available for consumption |
2809 » if(h2 == s2) | |
2810 » » h2 = 0; | |
2811 » // steal roughly half | |
2812 » if(t2 > h2) | |
2813 » » c = (t2 - h2) / 2; | |
2814 » else | |
2815 » » c = (s2 - h2 + t2) / 2; | |
2816 » // copy | |
2817 » for(i = 0; i != c; i++) { | |
2818 » » // the target queue is full? | |
2819 » » if(t == h-1 || (h == 0 && t == s-1)) | |
2820 » » » break; | |
2821 » » // the victim queue is empty? | |
2822 » » if(t2 == h2) | |
2823 » » » break; | |
2824 » » gp1 = p2->runq[h2++]; | |
2825 » » if(h2 == s2) | |
2826 » » » h2 = 0; | |
2827 » » p->runq[t++] = gp1; | |
2828 » » if(t == s) | |
2829 » » » t = 0; | |
2830 » } | |
2831 » p->runqtail = t; | |
2832 » p2->runqhead = h2; | |
2833 » runtime·unlock(p2); | |
2834 » runtime·unlock(p); | |
2835 return gp; | 2844 return gp; |
2836 } | 2845 } |
2837 | 2846 |
2838 void | 2847 void |
2839 runtime·testSchedLocalQueue(void) | 2848 runtime·testSchedLocalQueue(void) |
2840 { | 2849 { |
2841 P p; | 2850 P p; |
2842 » G gs[1000]; | 2851 » G gs[nelem(p.runq)]; |
2843 int32 i, j; | 2852 int32 i, j; |
2844 | 2853 |
2845 runtime·memclr((byte*)&p, sizeof(p)); | 2854 runtime·memclr((byte*)&p, sizeof(p)); |
2846 p.runqsize = 1; | |
2847 p.runqhead = 0; | |
2848 p.runqtail = 0; | |
2849 p.runq = runtime·malloc(p.runqsize*sizeof(*p.runq)); | |
2850 | 2855 |
2851 for(i = 0; i < nelem(gs); i++) { | 2856 for(i = 0; i < nelem(gs); i++) { |
2852 if(runqget(&p) != nil) | 2857 if(runqget(&p) != nil) |
2853 runtime·throw("runq is not empty initially"); | 2858 runtime·throw("runq is not empty initially"); |
2854 for(j = 0; j < i; j++) | 2859 for(j = 0; j < i; j++) |
2855 runqput(&p, &gs[i]); | 2860 runqput(&p, &gs[i]); |
2856 for(j = 0; j < i; j++) { | 2861 for(j = 0; j < i; j++) { |
2857 if(runqget(&p) != &gs[i]) { | 2862 if(runqget(&p) != &gs[i]) { |
2858 runtime·printf("bad element at iter %d/%d\n", i,
j); | 2863 runtime·printf("bad element at iter %d/%d\n", i,
j); |
2859 runtime·throw("bad element"); | 2864 runtime·throw("bad element"); |
2860 } | 2865 } |
2861 } | 2866 } |
2862 if(runqget(&p) != nil) | 2867 if(runqget(&p) != nil) |
2863 runtime·throw("runq is not empty afterwards"); | 2868 runtime·throw("runq is not empty afterwards"); |
2864 } | 2869 } |
2865 } | 2870 } |
2866 | 2871 |
2867 void | 2872 void |
2868 runtime·testSchedLocalQueueSteal(void) | 2873 runtime·testSchedLocalQueueSteal(void) |
2869 { | 2874 { |
2870 P p1, p2; | 2875 P p1, p2; |
2871 » G gs[1000], *gp; | 2876 » G gs[nelem(p1.runq)], *gp; |
2872 int32 i, j, s; | 2877 int32 i, j, s; |
2873 | 2878 |
2874 runtime·memclr((byte*)&p1, sizeof(p1)); | 2879 runtime·memclr((byte*)&p1, sizeof(p1)); |
2875 p1.runqsize = 1; | |
2876 p1.runqhead = 0; | |
2877 p1.runqtail = 0; | |
2878 p1.runq = runtime·malloc(p1.runqsize*sizeof(*p1.runq)); | |
2879 | |
2880 runtime·memclr((byte*)&p2, sizeof(p2)); | 2880 runtime·memclr((byte*)&p2, sizeof(p2)); |
2881 p2.runqsize = nelem(gs); | |
2882 p2.runqhead = 0; | |
2883 p2.runqtail = 0; | |
2884 p2.runq = runtime·malloc(p2.runqsize*sizeof(*p2.runq)); | |
2885 | 2881 |
2886 for(i = 0; i < nelem(gs); i++) { | 2882 for(i = 0; i < nelem(gs); i++) { |
2887 for(j = 0; j < i; j++) { | 2883 for(j = 0; j < i; j++) { |
2888 gs[j].sig = 0; | 2884 gs[j].sig = 0; |
2889 runqput(&p1, &gs[j]); | 2885 runqput(&p1, &gs[j]); |
2890 } | 2886 } |
2891 gp = runqsteal(&p2, &p1); | 2887 gp = runqsteal(&p2, &p1); |
2892 s = 0; | 2888 s = 0; |
2893 if(gp) { | 2889 if(gp) { |
2894 s++; | 2890 s++; |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2952 if(experiment[i+j] != name[j]) | 2948 if(experiment[i+j] != name[j]) |
2953 goto nomatch; | 2949 goto nomatch; |
2954 if(experiment[i+j] != '\0' && experiment[i+j] != ',') | 2950 if(experiment[i+j] != '\0' && experiment[i+j] != ',') |
2955 goto nomatch; | 2951 goto nomatch; |
2956 return 1; | 2952 return 1; |
2957 } | 2953 } |
2958 nomatch:; | 2954 nomatch:; |
2959 } | 2955 } |
2960 return 0; | 2956 return 0; |
2961 } | 2957 } |
LEFT | RIGHT |