LEFT | RIGHT |
(no file at all) | |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 #include "zasm_GOOS_GOARCH.h" | 5 #include "zasm_GOOS_GOARCH.h" |
6 #include "funcdata.h" | 6 #include "funcdata.h" |
7 | 7 #include "../../cmd/ld/textflag.h" |
8 TEXT _rt0_go(SB),7,$0 | 8 |
| 9 TEXT _rt0_go(SB),NOSPLIT,$0 |
9 // copy arguments forward on an even stack | 10 // copy arguments forward on an even stack |
10 MOVL argc+0(FP), AX | 11 MOVL argc+0(FP), AX |
11 MOVL argv+4(FP), BX | 12 MOVL argv+4(FP), BX |
12 SUBL $128, SP // plenty of scratch | 13 SUBL $128, SP // plenty of scratch |
13 ANDL $~15, SP | 14 ANDL $~15, SP |
14 MOVL AX, 120(SP) // save argc, argv away | 15 MOVL AX, 120(SP) // save argc, argv away |
15 MOVL BX, 124(SP) | 16 MOVL BX, 124(SP) |
16 | 17 |
17 // set default stack bounds. | 18 // set default stack bounds. |
18 // _cgo_init may update stackguard. | 19 // _cgo_init may update stackguard. |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 POPL AX | 103 POPL AX |
103 POPL AX | 104 POPL AX |
104 | 105 |
105 // start this M | 106 // start this M |
106 CALL runtime·mstart(SB) | 107 CALL runtime·mstart(SB) |
107 | 108 |
108 INT $3 | 109 INT $3 |
109 RET | 110 RET |
110 | 111 |
111 DATA runtime·main·f+0(SB)/4,$runtime·main(SB) | 112 DATA runtime·main·f+0(SB)/4,$runtime·main(SB) |
112 GLOBL» runtime·main·f(SB),8,$4 | 113 GLOBL» runtime·main·f(SB),RODATA,$4 |
113 | 114 |
114 TEXT runtime·breakpoint(SB),7,$0-0 | 115 TEXT runtime·breakpoint(SB),NOSPLIT,$0-0 |
115 INT $3 | 116 INT $3 |
116 RET | 117 RET |
117 | 118 |
118 TEXT runtime·asminit(SB),7,$0-0 | 119 TEXT runtime·asminit(SB),NOSPLIT,$0-0 |
119 // Linux and MinGW start the FPU in extended double precision. | 120 // Linux and MinGW start the FPU in extended double precision. |
120 // Other operating systems use double precision. | 121 // Other operating systems use double precision. |
121 // Change to double precision to match them, | 122 // Change to double precision to match them, |
122 // and to match other hardware that only has double. | 123 // and to match other hardware that only has double. |
123 PUSHL $0x27F | 124 PUSHL $0x27F |
124 FLDCW 0(SP) | 125 FLDCW 0(SP) |
125 POPL AX | 126 POPL AX |
126 RET | 127 RET |
127 | 128 |
128 /* | 129 /* |
129 * go-routine | 130 * go-routine |
130 */ | 131 */ |
131 | 132 |
132 // void gosave(Gobuf*) | 133 // void gosave(Gobuf*) |
133 // save state in Gobuf; setjmp | 134 // save state in Gobuf; setjmp |
134 TEXT runtime·gosave(SB), 7, $0-4 | 135 TEXT runtime·gosave(SB), NOSPLIT, $0-4 |
135 MOVL 4(SP), AX // gobuf | 136 MOVL 4(SP), AX // gobuf |
136 LEAL 4(SP), BX // caller's SP | 137 LEAL 4(SP), BX // caller's SP |
137 MOVL BX, gobuf_sp(AX) | 138 MOVL BX, gobuf_sp(AX) |
138 MOVL 0(SP), BX // caller's PC | 139 MOVL 0(SP), BX // caller's PC |
139 MOVL BX, gobuf_pc(AX) | 140 MOVL BX, gobuf_pc(AX) |
140 MOVL $0, gobuf_ret(AX) | 141 MOVL $0, gobuf_ret(AX) |
141 MOVL $0, gobuf_ctxt(AX) | 142 MOVL $0, gobuf_ctxt(AX) |
142 get_tls(CX) | 143 get_tls(CX) |
143 MOVL g(CX), BX | 144 MOVL g(CX), BX |
144 MOVL BX, gobuf_g(AX) | 145 MOVL BX, gobuf_g(AX) |
145 RET | 146 RET |
146 | 147 |
147 // void gogo(Gobuf*) | 148 // void gogo(Gobuf*) |
148 // restore state from Gobuf; longjmp | 149 // restore state from Gobuf; longjmp |
149 TEXT runtime·gogo(SB), 7, $0-4 | 150 TEXT runtime·gogo(SB), NOSPLIT, $0-4 |
150 MOVL 4(SP), BX // gobuf | 151 MOVL 4(SP), BX // gobuf |
151 MOVL gobuf_g(BX), DX | 152 MOVL gobuf_g(BX), DX |
152 MOVL 0(DX), CX // make sure g != nil | 153 MOVL 0(DX), CX // make sure g != nil |
153 get_tls(CX) | 154 get_tls(CX) |
154 MOVL DX, g(CX) | 155 MOVL DX, g(CX) |
155 MOVL gobuf_sp(BX), SP // restore SP | 156 MOVL gobuf_sp(BX), SP // restore SP |
156 MOVL gobuf_ret(BX), AX | 157 MOVL gobuf_ret(BX), AX |
157 MOVL gobuf_ctxt(BX), DX | 158 MOVL gobuf_ctxt(BX), DX |
158 MOVL $0, gobuf_sp(BX) // clear to help garbage collector | 159 MOVL $0, gobuf_sp(BX) // clear to help garbage collector |
159 MOVL $0, gobuf_ret(BX) | 160 MOVL $0, gobuf_ret(BX) |
160 MOVL $0, gobuf_ctxt(BX) | 161 MOVL $0, gobuf_ctxt(BX) |
161 MOVL gobuf_pc(BX), BX | 162 MOVL gobuf_pc(BX), BX |
162 JMP BX | 163 JMP BX |
163 | 164 |
164 // void mcall(void (*fn)(G*)) | 165 // void mcall(void (*fn)(G*)) |
165 // Switch to m->g0's stack, call fn(g). | 166 // Switch to m->g0's stack, call fn(g). |
166 // Fn must never return. It should gogo(&g->sched) | 167 // Fn must never return. It should gogo(&g->sched) |
167 // to keep running g. | 168 // to keep running g. |
168 TEXT runtime·mcall(SB), 7, $0-4 | 169 TEXT runtime·mcall(SB), NOSPLIT, $0-4 |
169 MOVL fn+0(FP), DI | 170 MOVL fn+0(FP), DI |
170 ········ | 171 ········ |
171 get_tls(CX) | 172 get_tls(CX) |
172 MOVL g(CX), AX // save state in g->sched | 173 MOVL g(CX), AX // save state in g->sched |
173 MOVL 0(SP), BX // caller's PC | 174 MOVL 0(SP), BX // caller's PC |
174 MOVL BX, (g_sched+gobuf_pc)(AX) | 175 MOVL BX, (g_sched+gobuf_pc)(AX) |
175 LEAL 4(SP), BX // caller's SP | 176 LEAL 4(SP), BX // caller's SP |
176 MOVL BX, (g_sched+gobuf_sp)(AX) | 177 MOVL BX, (g_sched+gobuf_sp)(AX) |
177 MOVL AX, (g_sched+gobuf_g)(AX) | 178 MOVL AX, (g_sched+gobuf_g)(AX) |
178 | 179 |
(...skipping 14 matching lines...) Expand all Loading... |
193 /* | 194 /* |
194 * support for morestack | 195 * support for morestack |
195 */ | 196 */ |
196 | 197 |
197 // Called during function prolog when more stack is needed. | 198 // Called during function prolog when more stack is needed. |
198 // | 199 // |
199 // The traceback routines see morestack on a g0 as being | 200 // The traceback routines see morestack on a g0 as being |
200 // the top of a stack (for example, morestack calling newstack | 201 // the top of a stack (for example, morestack calling newstack |
201 // calling the scheduler calling newm calling gc), so we must | 202 // calling the scheduler calling newm calling gc), so we must |
202 // record an argument size. For that purpose, it has no arguments. | 203 // record an argument size. For that purpose, it has no arguments. |
203 TEXT runtime·morestack(SB),7,$0-0 | 204 TEXT runtime·morestack(SB),NOSPLIT,$0-0 |
204 // Cannot grow scheduler stack (m->g0). | 205 // Cannot grow scheduler stack (m->g0). |
205 get_tls(CX) | 206 get_tls(CX) |
206 MOVL m(CX), BX | 207 MOVL m(CX), BX |
207 MOVL m_g0(BX), SI | 208 MOVL m_g0(BX), SI |
208 CMPL g(CX), SI | 209 CMPL g(CX), SI |
209 JNE 2(PC) | 210 JNE 2(PC) |
210 INT $3 | 211 INT $3 |
211 | 212 |
212 // frame size in DI | 213 // frame size in DI |
213 // arg size in AX | 214 // arg size in AX |
(...skipping 28 matching lines...) Expand all Loading... |
242 MOVL AX, SP | 243 MOVL AX, SP |
243 CALL runtime·newstack(SB) | 244 CALL runtime·newstack(SB) |
244 MOVL $0, 0x1003 // crash if newstack returns | 245 MOVL $0, 0x1003 // crash if newstack returns |
245 RET | 246 RET |
246 | 247 |
247 // Called from panic. Mimics morestack, | 248 // Called from panic. Mimics morestack, |
248 // reuses stack growth code to create a frame | 249 // reuses stack growth code to create a frame |
249 // with the desired args running the desired function. | 250 // with the desired args running the desired function. |
250 // | 251 // |
251 // func call(fn *byte, arg *byte, argsize uint32). | 252 // func call(fn *byte, arg *byte, argsize uint32). |
252 TEXT runtime·newstackcall(SB), 7, $0-12 | 253 TEXT runtime·newstackcall(SB), NOSPLIT, $0-12 |
253 get_tls(CX) | 254 get_tls(CX) |
254 MOVL m(CX), BX | 255 MOVL m(CX), BX |
255 | 256 |
256 // Save our caller's state as the PC and SP to | 257 // Save our caller's state as the PC and SP to |
257 // restore when returning from f. | 258 // restore when returning from f. |
258 MOVL 0(SP), AX // our caller's PC | 259 MOVL 0(SP), AX // our caller's PC |
259 MOVL AX, (m_morebuf+gobuf_pc)(BX) | 260 MOVL AX, (m_morebuf+gobuf_pc)(BX) |
260 LEAL 4(SP), AX // our caller's SP | 261 LEAL 4(SP), AX // our caller's SP |
261 MOVL AX, (m_morebuf+gobuf_sp)(BX) | 262 MOVL AX, (m_morebuf+gobuf_sp)(BX) |
262 MOVL g(CX), AX | 263 MOVL g(CX), AX |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
297 // of constant-sized-frame functions to encode a few bits of size in the pc. | 298 // of constant-sized-frame functions to encode a few bits of size in the pc. |
298 // Caution: ugly multiline assembly macros in your future! | 299 // Caution: ugly multiline assembly macros in your future! |
299 | 300 |
300 #define DISPATCH(NAME,MAXSIZE) \ | 301 #define DISPATCH(NAME,MAXSIZE) \ |
301 CMPL CX, $MAXSIZE; \ | 302 CMPL CX, $MAXSIZE; \ |
302 JA 3(PC); \ | 303 JA 3(PC); \ |
303 MOVL $runtime·NAME(SB), AX; \ | 304 MOVL $runtime·NAME(SB), AX; \ |
304 JMP AX | 305 JMP AX |
305 // Note: can't just "JMP runtime·NAME(SB)" - bad inlining results. | 306 // Note: can't just "JMP runtime·NAME(SB)" - bad inlining results. |
306 | 307 |
307 TEXT reflect·call(SB), 7, $0-12 | 308 TEXT reflect·call(SB), NOSPLIT, $0-12 |
308 MOVL argsize+8(FP), CX | 309 MOVL argsize+8(FP), CX |
309 DISPATCH(call16, 16) | 310 DISPATCH(call16, 16) |
310 DISPATCH(call32, 32) | 311 DISPATCH(call32, 32) |
311 DISPATCH(call64, 64) | 312 DISPATCH(call64, 64) |
312 DISPATCH(call128, 128) | 313 DISPATCH(call128, 128) |
313 DISPATCH(call256, 256) | 314 DISPATCH(call256, 256) |
314 DISPATCH(call512, 512) | 315 DISPATCH(call512, 512) |
315 DISPATCH(call1024, 1024) | 316 DISPATCH(call1024, 1024) |
316 DISPATCH(call2048, 2048) | 317 DISPATCH(call2048, 2048) |
317 DISPATCH(call4096, 4096) | 318 DISPATCH(call4096, 4096) |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
378 CALLFN(call67108864, 67108864) | 379 CALLFN(call67108864, 67108864) |
379 CALLFN(call134217728, 134217728) | 380 CALLFN(call134217728, 134217728) |
380 CALLFN(call268435456, 268435456) | 381 CALLFN(call268435456, 268435456) |
381 CALLFN(call536870912, 536870912) | 382 CALLFN(call536870912, 536870912) |
382 CALLFN(call1073741824, 1073741824) | 383 CALLFN(call1073741824, 1073741824) |
383 | 384 |
384 // Return point when leaving stack. | 385 // Return point when leaving stack. |
385 // | 386 // |
386 // Lessstack can appear in stack traces for the same reason | 387 // Lessstack can appear in stack traces for the same reason |
387 // as morestack; in that context, it has 0 arguments. | 388 // as morestack; in that context, it has 0 arguments. |
388 TEXT runtime·lessstack(SB), 7, $0-0 | 389 TEXT runtime·lessstack(SB), NOSPLIT, $0-0 |
389 // Save return value in m->cret | 390 // Save return value in m->cret |
390 get_tls(CX) | 391 get_tls(CX) |
391 MOVL m(CX), BX | 392 MOVL m(CX), BX |
392 MOVL AX, m_cret(BX) | 393 MOVL AX, m_cret(BX) |
393 | 394 |
394 // Call oldstack on m->g0's stack. | 395 // Call oldstack on m->g0's stack. |
395 MOVL m_g0(BX), BP | 396 MOVL m_g0(BX), BP |
396 MOVL BP, g(CX) | 397 MOVL BP, g(CX) |
397 MOVL (g_sched+gobuf_sp)(BP), SP | 398 MOVL (g_sched+gobuf_sp)(BP), SP |
398 CALL runtime·oldstack(SB) | 399 CALL runtime·oldstack(SB) |
399 MOVL $0, 0x1004 // crash if oldstack returns | 400 MOVL $0, 0x1004 // crash if oldstack returns |
400 RET | 401 RET |
401 | 402 |
402 | 403 |
403 // bool cas(int32 *val, int32 old, int32 new) | 404 // bool cas(int32 *val, int32 old, int32 new) |
404 // Atomically: | 405 // Atomically: |
405 // if(*val == old){ | 406 // if(*val == old){ |
406 // *val = new; | 407 // *val = new; |
407 // return 1; | 408 // return 1; |
408 // }else | 409 // }else |
409 // return 0; | 410 // return 0; |
410 TEXT runtime·cas(SB), 7, $0-12 | 411 TEXT runtime·cas(SB), NOSPLIT, $0-12 |
411 MOVL 4(SP), BX | 412 MOVL 4(SP), BX |
412 MOVL 8(SP), AX | 413 MOVL 8(SP), AX |
413 MOVL 12(SP), CX | 414 MOVL 12(SP), CX |
414 LOCK | 415 LOCK |
415 CMPXCHGL CX, 0(BX) | 416 CMPXCHGL CX, 0(BX) |
416 JZ 3(PC) | 417 JZ 3(PC) |
417 MOVL $0, AX | 418 MOVL $0, AX |
418 RET | 419 RET |
419 MOVL $1, AX | 420 MOVL $1, AX |
420 RET | 421 RET |
421 | 422 |
422 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) | 423 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new) |
423 // Atomically: | 424 // Atomically: |
424 // if(*val == *old){ | 425 // if(*val == *old){ |
425 // *val = new; | 426 // *val = new; |
426 // return 1; | 427 // return 1; |
427 // } else { | 428 // } else { |
428 // return 0; | 429 // return 0; |
429 // } | 430 // } |
430 TEXT runtime·cas64(SB), 7, $0-20 | 431 TEXT runtime·cas64(SB), NOSPLIT, $0-20 |
431 MOVL 4(SP), BP | 432 MOVL 4(SP), BP |
432 MOVL 8(SP), AX | 433 MOVL 8(SP), AX |
433 MOVL 12(SP), DX | 434 MOVL 12(SP), DX |
434 MOVL 16(SP), BX | 435 MOVL 16(SP), BX |
435 MOVL 20(SP), CX | 436 MOVL 20(SP), CX |
436 LOCK | 437 LOCK |
437 CMPXCHG8B 0(BP) | 438 CMPXCHG8B 0(BP) |
438 JNZ cas64_fail | 439 JNZ cas64_fail |
439 MOVL $1, AX | 440 MOVL $1, AX |
440 RET | 441 RET |
441 cas64_fail: | 442 cas64_fail: |
442 MOVL $0, AX | 443 MOVL $0, AX |
443 RET | 444 RET |
444 | 445 |
445 // bool casp(void **p, void *old, void *new) | 446 // bool casp(void **p, void *old, void *new) |
446 // Atomically: | 447 // Atomically: |
447 // if(*p == old){ | 448 // if(*p == old){ |
448 // *p = new; | 449 // *p = new; |
449 // return 1; | 450 // return 1; |
450 // }else | 451 // }else |
451 // return 0; | 452 // return 0; |
452 TEXT runtime·casp(SB), 7, $0-12 | 453 TEXT runtime·casp(SB), NOSPLIT, $0-12 |
453 MOVL 4(SP), BX | 454 MOVL 4(SP), BX |
454 MOVL 8(SP), AX | 455 MOVL 8(SP), AX |
455 MOVL 12(SP), CX | 456 MOVL 12(SP), CX |
456 LOCK | 457 LOCK |
457 CMPXCHGL CX, 0(BX) | 458 CMPXCHGL CX, 0(BX) |
458 JZ 3(PC) | 459 JZ 3(PC) |
459 MOVL $0, AX | 460 MOVL $0, AX |
460 RET | 461 RET |
461 MOVL $1, AX | 462 MOVL $1, AX |
462 RET | 463 RET |
463 | 464 |
464 // uint32 xadd(uint32 volatile *val, int32 delta) | 465 // uint32 xadd(uint32 volatile *val, int32 delta) |
465 // Atomically: | 466 // Atomically: |
466 // *val += delta; | 467 // *val += delta; |
467 // return *val; | 468 // return *val; |
468 TEXT runtime·xadd(SB), 7, $0-8 | 469 TEXT runtime·xadd(SB), NOSPLIT, $0-8 |
469 MOVL 4(SP), BX | 470 MOVL 4(SP), BX |
470 MOVL 8(SP), AX | 471 MOVL 8(SP), AX |
471 MOVL AX, CX | 472 MOVL AX, CX |
472 LOCK | 473 LOCK |
473 XADDL AX, 0(BX) | 474 XADDL AX, 0(BX) |
474 ADDL CX, AX | 475 ADDL CX, AX |
475 RET | 476 RET |
476 | 477 |
477 TEXT runtime·xchg(SB), 7, $0-8 | 478 TEXT runtime·xchg(SB), NOSPLIT, $0-8 |
478 MOVL 4(SP), BX | 479 MOVL 4(SP), BX |
479 MOVL 8(SP), AX | 480 MOVL 8(SP), AX |
480 XCHGL AX, 0(BX) | 481 XCHGL AX, 0(BX) |
481 RET | 482 RET |
482 | 483 |
483 TEXT runtime·procyield(SB),7,$0-0 | 484 TEXT runtime·procyield(SB),NOSPLIT,$0-0 |
484 MOVL 4(SP), AX | 485 MOVL 4(SP), AX |
485 again: | 486 again: |
486 PAUSE | 487 PAUSE |
487 SUBL $1, AX | 488 SUBL $1, AX |
488 JNZ again | 489 JNZ again |
489 RET | 490 RET |
490 | 491 |
491 TEXT runtime·atomicstorep(SB), 7, $0-8 | 492 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8 |
492 MOVL 4(SP), BX | 493 MOVL 4(SP), BX |
493 MOVL 8(SP), AX | 494 MOVL 8(SP), AX |
494 XCHGL AX, 0(BX) | 495 XCHGL AX, 0(BX) |
495 RET | 496 RET |
496 | 497 |
497 TEXT runtime·atomicstore(SB), 7, $0-8 | 498 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8 |
498 MOVL 4(SP), BX | 499 MOVL 4(SP), BX |
499 MOVL 8(SP), AX | 500 MOVL 8(SP), AX |
500 XCHGL AX, 0(BX) | 501 XCHGL AX, 0(BX) |
501 RET | 502 RET |
502 | 503 |
503 // uint64 atomicload64(uint64 volatile* addr); | 504 // uint64 atomicload64(uint64 volatile* addr); |
504 // so actually | 505 // so actually |
505 // void atomicload64(uint64 *res, uint64 volatile *addr); | 506 // void atomicload64(uint64 *res, uint64 volatile *addr); |
506 TEXT runtime·atomicload64(SB), 7, $0-8 | 507 TEXT runtime·atomicload64(SB), NOSPLIT, $0-8 |
507 MOVL 4(SP), BX | 508 MOVL 4(SP), BX |
508 MOVL 8(SP), AX | 509 MOVL 8(SP), AX |
509 // MOVQ (%EAX), %MM0 | 510 // MOVQ (%EAX), %MM0 |
510 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 | 511 BYTE $0x0f; BYTE $0x6f; BYTE $0x00 |
511 // MOVQ %MM0, 0(%EBX) | 512 // MOVQ %MM0, 0(%EBX) |
512 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 | 513 BYTE $0x0f; BYTE $0x7f; BYTE $0x03 |
513 // EMMS | 514 // EMMS |
514 BYTE $0x0F; BYTE $0x77 | 515 BYTE $0x0F; BYTE $0x77 |
515 RET | 516 RET |
516 | 517 |
517 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); | 518 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v); |
518 TEXT runtime·atomicstore64(SB), 7, $0-12 | 519 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12 |
519 MOVL 4(SP), AX | 520 MOVL 4(SP), AX |
520 // MOVQ and EMMS were introduced on the Pentium MMX. | 521 // MOVQ and EMMS were introduced on the Pentium MMX. |
521 // MOVQ 0x8(%ESP), %MM0 | 522 // MOVQ 0x8(%ESP), %MM0 |
522 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 | 523 BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08 |
523 // MOVQ %MM0, (%EAX) | 524 // MOVQ %MM0, (%EAX) |
524 BYTE $0x0f; BYTE $0x7f; BYTE $0x00· | 525 BYTE $0x0f; BYTE $0x7f; BYTE $0x00· |
525 // EMMS | 526 // EMMS |
526 BYTE $0x0F; BYTE $0x77 | 527 BYTE $0x0F; BYTE $0x77 |
527 // This is essentially a no-op, but it provides required memory fencing. | 528 // This is essentially a no-op, but it provides required memory fencing. |
528 // It can be replaced with MFENCE, but MFENCE was introduced only on the
Pentium4 (SSE2). | 529 // It can be replaced with MFENCE, but MFENCE was introduced only on the
Pentium4 (SSE2). |
529 MOVL $0, AX | 530 MOVL $0, AX |
530 LOCK | 531 LOCK |
531 XADDL AX, (SP) | 532 XADDL AX, (SP) |
532 RET | 533 RET |
533 | 534 |
534 // void jmpdefer(fn, sp); | 535 // void jmpdefer(fn, sp); |
535 // called from deferreturn. | 536 // called from deferreturn. |
536 // 1. pop the caller | 537 // 1. pop the caller |
537 // 2. sub 5 bytes from the callers return | 538 // 2. sub 5 bytes from the callers return |
538 // 3. jmp to the argument | 539 // 3. jmp to the argument |
539 TEXT runtime·jmpdefer(SB), 7, $0 | 540 TEXT runtime·jmpdefer(SB), NOSPLIT, $0 |
540 MOVL 4(SP), DX // fn | 541 MOVL 4(SP), DX // fn |
541 MOVL 8(SP), BX // caller sp | 542 MOVL 8(SP), BX // caller sp |
542 LEAL -4(BX), SP // caller sp after CALL | 543 LEAL -4(BX), SP // caller sp after CALL |
543 SUBL $5, (SP) // return to CALL again | 544 SUBL $5, (SP) // return to CALL again |
544 MOVL 0(DX), BX | 545 MOVL 0(DX), BX |
545 JMP BX // but first run the deferred function | 546 JMP BX // but first run the deferred function |
546 | 547 |
547 // Save state of caller into g->sched. | 548 // Save state of caller into g->sched. |
548 TEXT gosave<>(SB),7,$0 | 549 TEXT gosave<>(SB),NOSPLIT,$0 |
549 PUSHL AX | 550 PUSHL AX |
550 PUSHL BX | 551 PUSHL BX |
551 get_tls(BX) | 552 get_tls(BX) |
552 MOVL g(BX), BX | 553 MOVL g(BX), BX |
553 LEAL arg+0(FP), AX | 554 LEAL arg+0(FP), AX |
554 MOVL AX, (g_sched+gobuf_sp)(BX) | 555 MOVL AX, (g_sched+gobuf_sp)(BX) |
555 MOVL -4(AX), AX | 556 MOVL -4(AX), AX |
556 MOVL AX, (g_sched+gobuf_pc)(BX) | 557 MOVL AX, (g_sched+gobuf_pc)(BX) |
557 MOVL $0, (g_sched+gobuf_ret)(BX) | 558 MOVL $0, (g_sched+gobuf_ret)(BX) |
558 MOVL $0, (g_sched+gobuf_ctxt)(BX) | 559 MOVL $0, (g_sched+gobuf_ctxt)(BX) |
559 POPL BX | 560 POPL BX |
560 POPL AX | 561 POPL AX |
561 RET | 562 RET |
562 | 563 |
563 // asmcgocall(void(*fn)(void*), void *arg) | 564 // asmcgocall(void(*fn)(void*), void *arg) |
564 // Call fn(arg) on the scheduler stack, | 565 // Call fn(arg) on the scheduler stack, |
565 // aligned appropriately for the gcc ABI. | 566 // aligned appropriately for the gcc ABI. |
566 // See cgocall.c for more details. | 567 // See cgocall.c for more details. |
567 TEXT runtime·asmcgocall(SB),7,$0-8 | 568 TEXT runtime·asmcgocall(SB),NOSPLIT,$0-8 |
568 MOVL fn+0(FP), AX | 569 MOVL fn+0(FP), AX |
569 MOVL arg+4(FP), BX | 570 MOVL arg+4(FP), BX |
570 MOVL SP, DX | 571 MOVL SP, DX |
571 | 572 |
572 // Figure out if we need to switch to m->g0 stack. | 573 // Figure out if we need to switch to m->g0 stack. |
573 // We get called to create new OS threads too, and those | 574 // We get called to create new OS threads too, and those |
574 // come in on the m->g0 stack already. | 575 // come in on the m->g0 stack already. |
575 get_tls(CX) | 576 get_tls(CX) |
576 MOVL m(CX), BP | 577 MOVL m(CX), BP |
577 MOVL m_g0(BP), SI | 578 MOVL m_g0(BP), SI |
(...skipping 15 matching lines...) Expand all Loading... |
593 // Restore registers, g, stack pointer. | 594 // Restore registers, g, stack pointer. |
594 get_tls(CX) | 595 get_tls(CX) |
595 MOVL 8(SP), DI | 596 MOVL 8(SP), DI |
596 MOVL DI, g(CX) | 597 MOVL DI, g(CX) |
597 MOVL 4(SP), SP | 598 MOVL 4(SP), SP |
598 RET | 599 RET |
599 | 600 |
600 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) | 601 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize) |
601 // Turn the fn into a Go func (by taking its address) and call | 602 // Turn the fn into a Go func (by taking its address) and call |
602 // cgocallback_gofunc. | 603 // cgocallback_gofunc. |
603 TEXT runtime·cgocallback(SB),7,$12-12 | 604 TEXT runtime·cgocallback(SB),NOSPLIT,$12-12 |
604 LEAL fn+0(FP), AX | 605 LEAL fn+0(FP), AX |
605 MOVL AX, 0(SP) | 606 MOVL AX, 0(SP) |
606 MOVL frame+4(FP), AX | 607 MOVL frame+4(FP), AX |
607 MOVL AX, 4(SP) | 608 MOVL AX, 4(SP) |
608 MOVL framesize+8(FP), AX | 609 MOVL framesize+8(FP), AX |
609 MOVL AX, 8(SP) | 610 MOVL AX, 8(SP) |
610 MOVL $runtime·cgocallback_gofunc(SB), AX | 611 MOVL $runtime·cgocallback_gofunc(SB), AX |
611 CALL AX | 612 CALL AX |
612 RET | 613 RET |
613 | 614 |
614 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) | 615 // cgocallback_gofunc(FuncVal*, void *frame, uintptr framesize) |
615 // See cgocall.c for more details. | 616 // See cgocall.c for more details. |
616 TEXT runtime·cgocallback_gofunc(SB),7,$12-12 | 617 TEXT runtime·cgocallback_gofunc(SB),NOSPLIT,$12-12 |
617 // If m is nil, Go did not create the current thread. | 618 // If m is nil, Go did not create the current thread. |
618 // Call needm to obtain one for temporary use. | 619 // Call needm to obtain one for temporary use. |
619 // In this case, we're running on the thread stack, so there's | 620 // In this case, we're running on the thread stack, so there's |
620 // lots of space, but the linker doesn't know. Hide the call from | 621 // lots of space, but the linker doesn't know. Hide the call from |
621 // the linker analysis by using an indirect call through AX. | 622 // the linker analysis by using an indirect call through AX. |
622 get_tls(CX) | 623 get_tls(CX) |
623 #ifdef GOOS_windows | 624 #ifdef GOOS_windows |
624 MOVL $0, BP | 625 MOVL $0, BP |
625 CMPL CX, $0 | 626 CMPL CX, $0 |
626 JEQ 2(PC) | 627 JEQ 2(PC) |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
697 // for the duration of the call. Since the call is over, return it with
dropm. | 698 // for the duration of the call. Since the call is over, return it with
dropm. |
698 CMPL DX, $0 | 699 CMPL DX, $0 |
699 JNE 3(PC) | 700 JNE 3(PC) |
700 MOVL $runtime·dropm(SB), AX | 701 MOVL $runtime·dropm(SB), AX |
701 CALL AX | 702 CALL AX |
702 | 703 |
703 // Done! | 704 // Done! |
704 RET | 705 RET |
705 | 706 |
706 // void setmg(M*, G*); set m and g. for use by needm. | 707 // void setmg(M*, G*); set m and g. for use by needm. |
707 TEXT runtime·setmg(SB), 7, $0-8 | 708 TEXT runtime·setmg(SB), NOSPLIT, $0-8 |
708 #ifdef GOOS_windows | 709 #ifdef GOOS_windows |
709 MOVL mm+0(FP), AX | 710 MOVL mm+0(FP), AX |
710 CMPL AX, $0 | 711 CMPL AX, $0 |
711 JNE settls | 712 JNE settls |
712 MOVL $0, 0x14(FS) | 713 MOVL $0, 0x14(FS) |
713 RET | 714 RET |
714 settls: | 715 settls: |
715 LEAL m_tls(AX), AX | 716 LEAL m_tls(AX), AX |
716 MOVL AX, 0x14(FS) | 717 MOVL AX, 0x14(FS) |
717 #endif | 718 #endif |
718 MOVL mm+0(FP), AX | 719 MOVL mm+0(FP), AX |
719 get_tls(CX) | 720 get_tls(CX) |
720 MOVL mm+0(FP), AX | 721 MOVL mm+0(FP), AX |
721 MOVL AX, m(CX) | 722 MOVL AX, m(CX) |
722 MOVL gg+4(FP), BX | 723 MOVL gg+4(FP), BX |
723 MOVL BX, g(CX) | 724 MOVL BX, g(CX) |
724 RET | 725 RET |
725 | 726 |
726 // void setmg_gcc(M*, G*); set m and g. for use by gcc | 727 // void setmg_gcc(M*, G*); set m and g. for use by gcc |
727 TEXT setmg_gcc<>(SB), 7, $0 | 728 TEXT setmg_gcc<>(SB), NOSPLIT, $0 |
728 get_tls(AX) | 729 get_tls(AX) |
729 MOVL mm+0(FP), DX | 730 MOVL mm+0(FP), DX |
730 MOVL DX, m(AX) | 731 MOVL DX, m(AX) |
731 MOVL gg+4(FP), DX | 732 MOVL gg+4(FP), DX |
732 MOVL DX,g (AX) | 733 MOVL DX,g (AX) |
733 RET | 734 RET |
734 | 735 |
735 // check that SP is in range [g->stackbase, g->stackguard) | 736 // check that SP is in range [g->stackbase, g->stackguard) |
736 TEXT runtime·stackcheck(SB), 7, $0-0 | 737 TEXT runtime·stackcheck(SB), NOSPLIT, $0-0 |
737 get_tls(CX) | 738 get_tls(CX) |
738 MOVL g(CX), AX | 739 MOVL g(CX), AX |
739 CMPL g_stackbase(AX), SP | 740 CMPL g_stackbase(AX), SP |
740 JHI 2(PC) | 741 JHI 2(PC) |
741 INT $3 | 742 INT $3 |
742 CMPL SP, g_stackguard(AX) | 743 CMPL SP, g_stackguard(AX) |
743 JHI 2(PC) | 744 JHI 2(PC) |
744 INT $3 | 745 INT $3 |
745 RET | 746 RET |
746 | 747 |
747 TEXT runtime·memclr(SB),7,$0-8 | 748 TEXT runtime·memclr(SB),NOSPLIT,$0-8 |
748 MOVL 4(SP), DI // arg 1 addr | 749 MOVL 4(SP), DI // arg 1 addr |
749 MOVL 8(SP), CX // arg 2 count | 750 MOVL 8(SP), CX // arg 2 count |
750 MOVL CX, BX | 751 MOVL CX, BX |
751 ANDL $3, BX | 752 ANDL $3, BX |
752 SHRL $2, CX | 753 SHRL $2, CX |
753 MOVL $0, AX | 754 MOVL $0, AX |
754 CLD | 755 CLD |
755 REP | 756 REP |
756 STOSL | 757 STOSL |
757 MOVL BX, CX | 758 MOVL BX, CX |
758 REP | 759 REP |
759 STOSB | 760 STOSB |
760 RET | 761 RET |
761 | 762 |
762 TEXT runtime·getcallerpc(SB),7,$0-4 | 763 TEXT runtime·getcallerpc(SB),NOSPLIT,$0-4 |
763 MOVL x+0(FP),AX // addr of first arg | 764 MOVL x+0(FP),AX // addr of first arg |
764 MOVL -4(AX),AX // get calling pc | 765 MOVL -4(AX),AX // get calling pc |
765 RET | 766 RET |
766 | 767 |
767 TEXT runtime·setcallerpc(SB),7,$0-8 | 768 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8 |
768 MOVL x+0(FP),AX // addr of first arg | 769 MOVL x+0(FP),AX // addr of first arg |
769 MOVL x+4(FP), BX | 770 MOVL x+4(FP), BX |
770 MOVL BX, -4(AX) // set calling pc | 771 MOVL BX, -4(AX) // set calling pc |
771 RET | 772 RET |
772 | 773 |
773 TEXT runtime·getcallersp(SB), 7, $0-4 | 774 TEXT runtime·getcallersp(SB), NOSPLIT, $0-4 |
774 MOVL sp+0(FP), AX | 775 MOVL sp+0(FP), AX |
775 RET | 776 RET |
776 | 777 |
777 // int64 runtime·cputicks(void), so really | 778 // int64 runtime·cputicks(void), so really |
778 // void runtime·cputicks(int64 *ticks) | 779 // void runtime·cputicks(int64 *ticks) |
779 TEXT runtime·cputicks(SB),7,$0-4 | 780 TEXT runtime·cputicks(SB),NOSPLIT,$0-4 |
780 RDTSC | 781 RDTSC |
781 MOVL ret+0(FP), DI | 782 MOVL ret+0(FP), DI |
782 MOVL AX, 0(DI) | 783 MOVL AX, 0(DI) |
783 MOVL DX, 4(DI) | 784 MOVL DX, 4(DI) |
784 RET | 785 RET |
785 | 786 |
786 TEXT runtime·ldt0setup(SB),7,$16-0 | 787 TEXT runtime·ldt0setup(SB),NOSPLIT,$16-0 |
787 // set up ldt 7 to point at tls0 | 788 // set up ldt 7 to point at tls0 |
788 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. | 789 // ldt 1 would be fine on Linux, but on OS X, 7 is as low as we can go. |
789 // the entry number is just a hint. setldt will set up GS with what it
used. | 790 // the entry number is just a hint. setldt will set up GS with what it
used. |
790 MOVL $7, 0(SP) | 791 MOVL $7, 0(SP) |
791 LEAL runtime·tls0(SB), AX | 792 LEAL runtime·tls0(SB), AX |
792 MOVL AX, 4(SP) | 793 MOVL AX, 4(SP) |
793 MOVL $32, 8(SP) // sizeof(tls array) | 794 MOVL $32, 8(SP) // sizeof(tls array) |
794 CALL runtime·setldt(SB) | 795 CALL runtime·setldt(SB) |
795 RET | 796 RET |
796 | 797 |
797 TEXT runtime·emptyfunc(SB),0,$0-0 | 798 TEXT runtime·emptyfunc(SB),0,$0-0 |
798 RET | 799 RET |
799 | 800 |
800 TEXT runtime·abort(SB),7,$0-0 | 801 TEXT runtime·abort(SB),NOSPLIT,$0-0 |
801 INT $0x3 | 802 INT $0x3 |
802 | 803 |
803 TEXT runtime·stackguard(SB),7,$0-8 | 804 TEXT runtime·stackguard(SB),NOSPLIT,$0-8 |
804 MOVL SP, DX | 805 MOVL SP, DX |
805 MOVL DX, sp+0(FP) | 806 MOVL DX, sp+0(FP) |
806 get_tls(CX) | 807 get_tls(CX) |
807 MOVL g(CX), BX | 808 MOVL g(CX), BX |
808 MOVL g_stackguard(BX), DX | 809 MOVL g_stackguard(BX), DX |
809 MOVL DX, limit+4(FP) | 810 MOVL DX, limit+4(FP) |
810 RET | 811 RET |
811 | 812 |
812 GLOBL runtime·tls0(SB), $32 | 813 GLOBL runtime·tls0(SB), $32 |
813 | 814 |
814 // hash function using AES hardware instructions | 815 // hash function using AES hardware instructions |
815 TEXT runtime·aeshash(SB),7,$0-12 | 816 TEXT runtime·aeshash(SB),NOSPLIT,$0-12 |
816 MOVL 4(SP), DX // ptr to hash value | 817 MOVL 4(SP), DX // ptr to hash value |
817 MOVL 8(SP), CX // size | 818 MOVL 8(SP), CX // size |
818 MOVL 12(SP), AX // ptr to data | 819 MOVL 12(SP), AX // ptr to data |
819 JMP runtime·aeshashbody(SB) | 820 JMP runtime·aeshashbody(SB) |
820 | 821 |
821 TEXT runtime·aeshashstr(SB),7,$0-12 | 822 TEXT runtime·aeshashstr(SB),NOSPLIT,$0-12 |
822 MOVL 4(SP), DX // ptr to hash value | 823 MOVL 4(SP), DX // ptr to hash value |
823 MOVL 12(SP), AX // ptr to string struct | 824 MOVL 12(SP), AX // ptr to string struct |
824 MOVL 4(AX), CX // length of string | 825 MOVL 4(AX), CX // length of string |
825 MOVL (AX), AX // string data | 826 MOVL (AX), AX // string data |
826 JMP runtime·aeshashbody(SB) | 827 JMP runtime·aeshashbody(SB) |
827 | 828 |
828 // AX: data | 829 // AX: data |
829 // CX: length | 830 // CX: length |
830 // DX: ptr to seed input / hash output | 831 // DX: ptr to seed input / hash output |
831 TEXT runtime·aeshashbody(SB),7,$0-12 | 832 TEXT runtime·aeshashbody(SB),NOSPLIT,$0-12 |
832 MOVL (DX), X0 // seed to low 32 bits of xmm0 | 833 MOVL (DX), X0 // seed to low 32 bits of xmm0 |
833 PINSRD $1, CX, X0 // size to next 32 bits of xmm0 | 834 PINSRD $1, CX, X0 // size to next 32 bits of xmm0 |
834 MOVO runtime·aeskeysched+0(SB), X2 | 835 MOVO runtime·aeskeysched+0(SB), X2 |
835 MOVO runtime·aeskeysched+16(SB), X3 | 836 MOVO runtime·aeskeysched+16(SB), X3 |
836 CMPL CX, $16 | 837 CMPL CX, $16 |
837 JB aessmall | 838 JB aessmall |
838 aesloop: | 839 aesloop: |
839 CMPL CX, $16 | 840 CMPL CX, $16 |
840 JBE aesloopend | 841 JBE aesloopend |
841 MOVOU (AX), X1 | 842 MOVOU (AX), X1 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
876 AESENC X3, X0 | 877 AESENC X3, X0 |
877 AESENC X1, X0 | 878 AESENC X1, X0 |
878 finalize:······· | 879 finalize:······· |
879 // finalize hash | 880 // finalize hash |
880 AESENC X2, X0 | 881 AESENC X2, X0 |
881 AESENC X3, X0 | 882 AESENC X3, X0 |
882 AESENC X2, X0 | 883 AESENC X2, X0 |
883 MOVL X0, (DX) | 884 MOVL X0, (DX) |
884 RET | 885 RET |
885 | 886 |
886 TEXT runtime·aeshash32(SB),7,$0-12 | 887 TEXT runtime·aeshash32(SB),NOSPLIT,$0-12 |
887 MOVL 4(SP), DX // ptr to hash value | 888 MOVL 4(SP), DX // ptr to hash value |
888 MOVL 12(SP), AX // ptr to data | 889 MOVL 12(SP), AX // ptr to data |
889 MOVL (DX), X0 // seed | 890 MOVL (DX), X0 // seed |
890 PINSRD $1, (AX), X0 // data | 891 PINSRD $1, (AX), X0 // data |
891 AESENC runtime·aeskeysched+0(SB), X0 | 892 AESENC runtime·aeskeysched+0(SB), X0 |
892 AESENC runtime·aeskeysched+16(SB), X0 | 893 AESENC runtime·aeskeysched+16(SB), X0 |
893 AESENC runtime·aeskeysched+0(SB), X0 | 894 AESENC runtime·aeskeysched+0(SB), X0 |
894 MOVL X0, (DX) | 895 MOVL X0, (DX) |
895 RET | 896 RET |
896 | 897 |
897 TEXT runtime·aeshash64(SB),7,$0-12 | 898 TEXT runtime·aeshash64(SB),NOSPLIT,$0-12 |
898 MOVL 4(SP), DX // ptr to hash value | 899 MOVL 4(SP), DX // ptr to hash value |
899 MOVL 12(SP), AX // ptr to data | 900 MOVL 12(SP), AX // ptr to data |
900 MOVQ (AX), X0 // data | 901 MOVQ (AX), X0 // data |
901 PINSRD $2, (DX), X0 // seed | 902 PINSRD $2, (DX), X0 // seed |
902 AESENC runtime·aeskeysched+0(SB), X0 | 903 AESENC runtime·aeskeysched+0(SB), X0 |
903 AESENC runtime·aeskeysched+16(SB), X0 | 904 AESENC runtime·aeskeysched+16(SB), X0 |
904 AESENC runtime·aeskeysched+0(SB), X0 | 905 AESENC runtime·aeskeysched+0(SB), X0 |
905 MOVL X0, (DX) | 906 MOVL X0, (DX) |
906 RET | 907 RET |
907 | 908 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
979 DATA masks<>+0xe0(SB)/4, $0xffffffff | 980 DATA masks<>+0xe0(SB)/4, $0xffffffff |
980 DATA masks<>+0xe4(SB)/4, $0xffffffff | 981 DATA masks<>+0xe4(SB)/4, $0xffffffff |
981 DATA masks<>+0xe8(SB)/4, $0xffffffff | 982 DATA masks<>+0xe8(SB)/4, $0xffffffff |
982 DATA masks<>+0xec(SB)/4, $0x0000ffff | 983 DATA masks<>+0xec(SB)/4, $0x0000ffff |
983 ········ | 984 ········ |
984 DATA masks<>+0xf0(SB)/4, $0xffffffff | 985 DATA masks<>+0xf0(SB)/4, $0xffffffff |
985 DATA masks<>+0xf4(SB)/4, $0xffffffff | 986 DATA masks<>+0xf4(SB)/4, $0xffffffff |
986 DATA masks<>+0xf8(SB)/4, $0xffffffff | 987 DATA masks<>+0xf8(SB)/4, $0xffffffff |
987 DATA masks<>+0xfc(SB)/4, $0x00ffffff | 988 DATA masks<>+0xfc(SB)/4, $0x00ffffff |
988 | 989 |
989 GLOBL masks<>(SB),8,$256 | 990 GLOBL masks<>(SB),RODATA,$256 |
990 | 991 |
991 // these are arguments to pshufb. They move data down from | 992 // these are arguments to pshufb. They move data down from |
992 // the high bytes of the register to the low bytes of the register. | 993 // the high bytes of the register to the low bytes of the register. |
993 // index is how many bytes to move. | 994 // index is how many bytes to move. |
994 DATA shifts<>+0x00(SB)/4, $0x00000000 | 995 DATA shifts<>+0x00(SB)/4, $0x00000000 |
995 DATA shifts<>+0x04(SB)/4, $0x00000000 | 996 DATA shifts<>+0x04(SB)/4, $0x00000000 |
996 DATA shifts<>+0x08(SB)/4, $0x00000000 | 997 DATA shifts<>+0x08(SB)/4, $0x00000000 |
997 DATA shifts<>+0x0c(SB)/4, $0x00000000 | 998 DATA shifts<>+0x0c(SB)/4, $0x00000000 |
998 ········ | 999 ········ |
999 DATA shifts<>+0x10(SB)/4, $0xffffff0f | 1000 DATA shifts<>+0x10(SB)/4, $0xffffff0f |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1064 DATA shifts<>+0xe0(SB)/4, $0x05040302 | 1065 DATA shifts<>+0xe0(SB)/4, $0x05040302 |
1065 DATA shifts<>+0xe4(SB)/4, $0x09080706 | 1066 DATA shifts<>+0xe4(SB)/4, $0x09080706 |
1066 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a | 1067 DATA shifts<>+0xe8(SB)/4, $0x0d0c0b0a |
1067 DATA shifts<>+0xec(SB)/4, $0xffff0f0e | 1068 DATA shifts<>+0xec(SB)/4, $0xffff0f0e |
1068 ········ | 1069 ········ |
1069 DATA shifts<>+0xf0(SB)/4, $0x04030201 | 1070 DATA shifts<>+0xf0(SB)/4, $0x04030201 |
1070 DATA shifts<>+0xf4(SB)/4, $0x08070605 | 1071 DATA shifts<>+0xf4(SB)/4, $0x08070605 |
1071 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 | 1072 DATA shifts<>+0xf8(SB)/4, $0x0c0b0a09 |
1072 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d | 1073 DATA shifts<>+0xfc(SB)/4, $0xff0f0e0d |
1073 | 1074 |
1074 GLOBL shifts<>(SB),8,$256 | 1075 GLOBL shifts<>(SB),RODATA,$256 |
1075 | 1076 |
1076 TEXT runtime·memeq(SB),7,$0-12 | 1077 TEXT runtime·memeq(SB),NOSPLIT,$0-12 |
1077 MOVL a+0(FP), SI | 1078 MOVL a+0(FP), SI |
1078 MOVL b+4(FP), DI | 1079 MOVL b+4(FP), DI |
1079 MOVL count+8(FP), BX | 1080 MOVL count+8(FP), BX |
1080 JMP runtime·memeqbody(SB) | 1081 JMP runtime·memeqbody(SB) |
1081 | 1082 |
1082 TEXT bytes·Equal(SB),7,$0-25 | 1083 TEXT bytes·Equal(SB),NOSPLIT,$0-25 |
1083 MOVL a_len+4(FP), BX | 1084 MOVL a_len+4(FP), BX |
1084 MOVL b_len+16(FP), CX | 1085 MOVL b_len+16(FP), CX |
1085 XORL AX, AX | 1086 XORL AX, AX |
1086 CMPL BX, CX | 1087 CMPL BX, CX |
1087 JNE eqret | 1088 JNE eqret |
1088 MOVL a+0(FP), SI | 1089 MOVL a+0(FP), SI |
1089 MOVL b+12(FP), DI | 1090 MOVL b+12(FP), DI |
1090 CALL runtime·memeqbody(SB) | 1091 CALL runtime·memeqbody(SB) |
1091 eqret: | 1092 eqret: |
1092 MOVB AX, ret+24(FP) | 1093 MOVB AX, ret+24(FP) |
1093 RET | 1094 RET |
1094 | 1095 |
1095 // a in SI | 1096 // a in SI |
1096 // b in DI | 1097 // b in DI |
1097 // count in BX | 1098 // count in BX |
1098 TEXT runtime·memeqbody(SB),7,$0-0 | 1099 TEXT runtime·memeqbody(SB),NOSPLIT,$0-0 |
1099 XORL AX, AX | 1100 XORL AX, AX |
1100 | 1101 |
1101 CMPL BX, $4 | 1102 CMPL BX, $4 |
1102 JB small | 1103 JB small |
1103 | 1104 |
1104 // 64 bytes at a time using xmm registers | 1105 // 64 bytes at a time using xmm registers |
1105 hugeloop: | 1106 hugeloop: |
1106 CMPL BX, $64 | 1107 CMPL BX, $64 |
1107 JB bigloop | 1108 JB bigloop |
1108 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 | 1109 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1181 MOVL -4(DI)(BX*1), DI | 1182 MOVL -4(DI)(BX*1), DI |
1182 SHRL CX, DI | 1183 SHRL CX, DI |
1183 di_finish: | 1184 di_finish: |
1184 | 1185 |
1185 SUBL SI, DI | 1186 SUBL SI, DI |
1186 SHLL CX, DI | 1187 SHLL CX, DI |
1187 equal: | 1188 equal: |
1188 SETEQ AX | 1189 SETEQ AX |
1189 RET | 1190 RET |
1190 | 1191 |
1191 TEXT runtime·cmpstring(SB),7,$0-20 | 1192 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20 |
1192 MOVL s1+0(FP), SI | 1193 MOVL s1+0(FP), SI |
1193 MOVL s1+4(FP), BX | 1194 MOVL s1+4(FP), BX |
1194 MOVL s2+8(FP), DI | 1195 MOVL s2+8(FP), DI |
1195 MOVL s2+12(FP), DX | 1196 MOVL s2+12(FP), DX |
1196 CALL runtime·cmpbody(SB) | 1197 CALL runtime·cmpbody(SB) |
1197 MOVL AX, res+16(FP) | 1198 MOVL AX, res+16(FP) |
1198 RET | 1199 RET |
1199 | 1200 |
1200 TEXT bytes·Compare(SB),7,$0-28 | 1201 TEXT bytes·Compare(SB),NOSPLIT,$0-28 |
1201 MOVL s1+0(FP), SI | 1202 MOVL s1+0(FP), SI |
1202 MOVL s1+4(FP), BX | 1203 MOVL s1+4(FP), BX |
1203 MOVL s2+12(FP), DI | 1204 MOVL s2+12(FP), DI |
1204 MOVL s2+16(FP), DX | 1205 MOVL s2+16(FP), DX |
1205 CALL runtime·cmpbody(SB) | 1206 CALL runtime·cmpbody(SB) |
1206 MOVL AX, res+24(FP) | 1207 MOVL AX, res+24(FP) |
1207 RET | 1208 RET |
1208 | 1209 |
1209 TEXT bytes·IndexByte(SB),7,$0 | 1210 TEXT bytes·IndexByte(SB),NOSPLIT,$0 |
1210 MOVL s+0(FP), SI | 1211 MOVL s+0(FP), SI |
1211 MOVL s_len+4(FP), CX | 1212 MOVL s_len+4(FP), CX |
1212 MOVB c+12(FP), AL | 1213 MOVB c+12(FP), AL |
1213 MOVL SI, DI | 1214 MOVL SI, DI |
1214 CLD; REPN; SCASB | 1215 CLD; REPN; SCASB |
1215 JZ 3(PC) | 1216 JZ 3(PC) |
1216 MOVL $-1, ret+16(FP) | 1217 MOVL $-1, ret+16(FP) |
1217 RET | 1218 RET |
1218 SUBL SI, DI | 1219 SUBL SI, DI |
1219 SUBL $1, DI | 1220 SUBL $1, DI |
1220 MOVL DI, ret+16(FP) | 1221 MOVL DI, ret+16(FP) |
1221 RET | 1222 RET |
1222 | 1223 |
1223 TEXT strings·IndexByte(SB),7,$0 | 1224 TEXT strings·IndexByte(SB),NOSPLIT,$0 |
1224 MOVL s+0(FP), SI | 1225 MOVL s+0(FP), SI |
1225 MOVL s_len+4(FP), CX | 1226 MOVL s_len+4(FP), CX |
1226 MOVB c+8(FP), AL | 1227 MOVB c+8(FP), AL |
1227 MOVL SI, DI | 1228 MOVL SI, DI |
1228 CLD; REPN; SCASB | 1229 CLD; REPN; SCASB |
1229 JZ 3(PC) | 1230 JZ 3(PC) |
1230 MOVL $-1, ret+12(FP) | 1231 MOVL $-1, ret+12(FP) |
1231 RET | 1232 RET |
1232 SUBL SI, DI | 1233 SUBL SI, DI |
1233 SUBL $1, DI | 1234 SUBL $1, DI |
1234 MOVL DI, ret+12(FP) | 1235 MOVL DI, ret+12(FP) |
1235 RET | 1236 RET |
1236 | 1237 |
1237 // input: | 1238 // input: |
1238 // SI = a | 1239 // SI = a |
1239 // DI = b | 1240 // DI = b |
1240 // BX = alen | 1241 // BX = alen |
1241 // DX = blen | 1242 // DX = blen |
1242 // output: | 1243 // output: |
1243 // AX = 1/0/-1 | 1244 // AX = 1/0/-1 |
1244 TEXT runtime·cmpbody(SB),7,$0-0 | 1245 TEXT runtime·cmpbody(SB),NOSPLIT,$0-0 |
1245 CMPL SI, DI | 1246 CMPL SI, DI |
1246 JEQ cmp_allsame | 1247 JEQ cmp_allsame |
1247 CMPL BX, DX | 1248 CMPL BX, DX |
1248 MOVL DX, BP | 1249 MOVL DX, BP |
1249 CMOVLLT BX, BP // BP = min(alen, blen) | 1250 CMOVLLT BX, BP // BP = min(alen, blen) |
1250 CMPL BP, $4 | 1251 CMPL BP, $4 |
1251 JB cmp_small | 1252 JB cmp_small |
1252 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 | 1253 TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2 |
1253 JE cmp_mediumloop | 1254 JE cmp_mediumloop |
1254 cmp_largeloop: | 1255 cmp_largeloop: |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1343 // all the bytes in common are the same, so we just need | 1344 // all the bytes in common are the same, so we just need |
1344 // to compare the lengths. | 1345 // to compare the lengths. |
1345 cmp_allsame: | 1346 cmp_allsame: |
1346 XORL AX, AX | 1347 XORL AX, AX |
1347 XORL CX, CX | 1348 XORL CX, CX |
1348 CMPL BX, DX | 1349 CMPL BX, DX |
1349 SETGT AX // 1 if alen > blen | 1350 SETGT AX // 1 if alen > blen |
1350 SETEQ CX // 1 if alen == blen | 1351 SETEQ CX // 1 if alen == blen |
1351 LEAL -1(CX)(AX*2), AX // 1,0,-1 result | 1352 LEAL -1(CX)(AX*2), AX // 1,0,-1 result |
1352 RET | 1353 RET |
LEFT | RIGHT |