forked from st3fan/osx-10.9
-
Notifications
You must be signed in to change notification settings - Fork 13
/
mp.c
1835 lines (1570 loc) · 45.2 KB
/
mp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2000-2012 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
*/
#include <mach_rt.h>
#include <mach_kdp.h>
#include <mach_ldebug.h>
#include <gprof.h>
#include <mach/mach_types.h>
#include <mach/kern_return.h>
#include <kern/kern_types.h>
#include <kern/startup.h>
#include <kern/timer_queue.h>
#include <kern/processor.h>
#include <kern/cpu_number.h>
#include <kern/cpu_data.h>
#include <kern/assert.h>
#include <kern/machine.h>
#include <kern/pms.h>
#include <kern/misc_protos.h>
#include <kern/timer_call.h>
#include <kern/kalloc.h>
#include <kern/queue.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <profiling/profile-mk.h>
#include <i386/proc_reg.h>
#include <i386/cpu_threads.h>
#include <i386/mp_desc.h>
#include <i386/misc_protos.h>
#include <i386/trap.h>
#include <i386/postcode.h>
#include <i386/machine_routines.h>
#include <i386/mp.h>
#include <i386/mp_events.h>
#include <i386/lapic.h>
#include <i386/cpuid.h>
#include <i386/fpu.h>
#include <i386/machine_cpu.h>
#include <i386/pmCPU.h>
#if CONFIG_MCA
#include <i386/machine_check.h>
#endif
#include <i386/acpi.h>
#include <chud/chud_xnu.h>
#include <chud/chud_xnu_private.h>
#include <sys/kdebug.h>
#include <console/serial_protos.h>
#if MP_DEBUG
#define PAUSE delay(1000000)
#define DBG(x...) kprintf(x)
#else
#define DBG(x...)
#define PAUSE
#endif /* MP_DEBUG */
/* Debugging/test trace events: */
#define TRACE_MP_TLB_FLUSH MACHDBG_CODE(DBG_MACH_MP, 0)
#define TRACE_MP_CPUS_CALL MACHDBG_CODE(DBG_MACH_MP, 1)
#define TRACE_MP_CPUS_CALL_LOCAL MACHDBG_CODE(DBG_MACH_MP, 2)
#define TRACE_MP_CPUS_CALL_ACTION MACHDBG_CODE(DBG_MACH_MP, 3)
#define TRACE_MP_CPUS_CALL_NOBUF MACHDBG_CODE(DBG_MACH_MP, 4)
#define TRACE_MP_CPU_FAST_START MACHDBG_CODE(DBG_MACH_MP, 5)
#define TRACE_MP_CPU_START MACHDBG_CODE(DBG_MACH_MP, 6)
#define TRACE_MP_CPU_DEACTIVATE MACHDBG_CODE(DBG_MACH_MP, 7)
#define ABS(v) (((v) > 0)?(v):-(v))
void slave_boot_init(void);
void i386_cpu_IPI(int cpu);
#if MACH_KDP
static void mp_kdp_wait(boolean_t flush, boolean_t isNMI);
#endif /* MACH_KDP */
static void mp_rendezvous_action(void);
static void mp_broadcast_action(void);
#if MACH_KDP
static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
#endif /* MACH_KDP */
static int NMIInterruptHandler(x86_saved_state_t *regs);
boolean_t smp_initialized = FALSE;
uint32_t TSC_sync_margin = 0xFFF;
volatile boolean_t force_immediate_debugger_NMI = FALSE;
volatile boolean_t pmap_tlb_flush_timeout = FALSE;
decl_simple_lock_data(,mp_kdp_lock);
decl_lck_mtx_data(static, mp_cpu_boot_lock);
lck_mtx_ext_t mp_cpu_boot_lock_ext;
/* Variables needed for MP rendezvous. */
decl_simple_lock_data(,mp_rv_lock);
static void (*mp_rv_setup_func)(void *arg);
static void (*mp_rv_action_func)(void *arg);
static void (*mp_rv_teardown_func)(void *arg);
static void *mp_rv_func_arg;
static volatile int mp_rv_ncpus;
/* Cache-aligned barriers: */
static volatile long mp_rv_entry __attribute__((aligned(64)));
static volatile long mp_rv_exit __attribute__((aligned(64)));
static volatile long mp_rv_complete __attribute__((aligned(64)));
volatile uint64_t debugger_entry_time;
volatile uint64_t debugger_exit_time;
#if MACH_KDP
#include <kdp/kdp.h>
extern int kdp_snapshot;
static struct _kdp_xcpu_call_func {
kdp_x86_xcpu_func_t func;
void *arg0, *arg1;
volatile long ret;
volatile uint16_t cpu;
} kdp_xcpu_call_func = {
.cpu = KDP_XCPU_NONE
};
#endif
/* Variables needed for MP broadcast. */
static void (*mp_bc_action_func)(void *arg);
static void *mp_bc_func_arg;
static int mp_bc_ncpus;
static volatile long mp_bc_count;
decl_lck_mtx_data(static, mp_bc_lock);
lck_mtx_ext_t mp_bc_lock_ext;
static volatile int debugger_cpu = -1;
volatile long NMIPI_acks = 0;
volatile long NMI_count = 0;
extern void NMI_cpus(void);
static void mp_cpus_call_init(void);
static void mp_cpus_call_cpu_init(void);
static void mp_cpus_call_action(void);
static void mp_call_PM(void);
char mp_slave_stack[PAGE_SIZE] __attribute__((aligned(PAGE_SIZE))); // Temp stack for slave init
/* PAL-related routines */
boolean_t i386_smp_init(int nmi_vector, i386_intr_func_t nmi_handler,
int ipi_vector, i386_intr_func_t ipi_handler);
void i386_start_cpu(int lapic_id, int cpu_num);
void i386_send_NMI(int cpu);
#if GPROF
/*
* Initialize dummy structs for profiling. These aren't used but
* allows hertz_tick() to be built with GPROF defined.
*/
struct profile_vars _profile_vars;
struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
#define GPROF_INIT() \
{ \
int i; \
\
/* Hack to initialize pointers to unused profiling structs */ \
for (i = 1; i < MAX_CPUS; i++) \
_profile_vars_cpus[i] = &_profile_vars; \
}
#else
#define GPROF_INIT()
#endif /* GPROF */
static lck_grp_t smp_lck_grp;
static lck_grp_attr_t smp_lck_grp_attr;
#define NUM_CPU_WARM_CALLS 20
struct timer_call cpu_warm_call_arr[NUM_CPU_WARM_CALLS];
queue_head_t cpu_warm_call_list;
decl_simple_lock_data(static, cpu_warm_lock);
typedef struct cpu_warm_data {
timer_call_t cwd_call;
uint64_t cwd_deadline;
int cwd_result;
} *cpu_warm_data_t;
static void cpu_prewarm_init(void);
static void cpu_warm_timer_call_func(call_entry_param_t p0, call_entry_param_t p1);
static void _cpu_warm_setup(void *arg);
static timer_call_t grab_warm_timer_call(void);
static void free_warm_timer_call(timer_call_t call);
void
smp_init(void)
{
simple_lock_init(&mp_kdp_lock, 0);
simple_lock_init(&mp_rv_lock, 0);
lck_grp_attr_setdefault(&smp_lck_grp_attr);
lck_grp_init(&smp_lck_grp, "i386_smp", &smp_lck_grp_attr);
lck_mtx_init_ext(&mp_cpu_boot_lock, &mp_cpu_boot_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
lck_mtx_init_ext(&mp_bc_lock, &mp_bc_lock_ext, &smp_lck_grp, LCK_ATTR_NULL);
console_init();
if(!i386_smp_init(LAPIC_NMI_INTERRUPT, NMIInterruptHandler,
LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler))
return;
cpu_thread_init();
GPROF_INIT();
DBGLOG_CPU_INIT(master_cpu);
mp_cpus_call_init();
mp_cpus_call_cpu_init();
if (PE_parse_boot_argn("TSC_sync_margin",
&TSC_sync_margin, sizeof(TSC_sync_margin))) {
kprintf("TSC sync Margin 0x%x\n", TSC_sync_margin);
} else if (cpuid_vmm_present()) {
kprintf("TSC sync margin disabled\n");
TSC_sync_margin = 0;
}
smp_initialized = TRUE;
cpu_prewarm_init();
return;
}
typedef struct {
int target_cpu;
int target_lapic;
int starter_cpu;
} processor_start_info_t;
static processor_start_info_t start_info __attribute__((aligned(64)));
/*
* Cache-alignment is to avoid cross-cpu false-sharing interference.
*/
static volatile long tsc_entry_barrier __attribute__((aligned(64)));
static volatile long tsc_exit_barrier __attribute__((aligned(64)));
static volatile uint64_t tsc_target __attribute__((aligned(64)));
/*
* Poll a CPU to see when it has marked itself as running.
*/
static void
mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
{
while (iters-- > 0) {
if (cpu_datap(slot_num)->cpu_running)
break;
delay(usecdelay);
}
}
/*
* Quickly bring a CPU back online which has been halted.
*/
kern_return_t
intel_startCPU_fast(int slot_num)
{
kern_return_t rc;
/*
* Try to perform a fast restart
*/
rc = pmCPUExitHalt(slot_num);
if (rc != KERN_SUCCESS)
/*
* The CPU was not eligible for a fast restart.
*/
return(rc);
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPU_FAST_START | DBG_FUNC_START,
slot_num, 0, 0, 0, 0);
/*
* Wait until the CPU is back online.
*/
mp_disable_preemption();
/*
* We use short pauses (1us) for low latency. 30,000 iterations is
* longer than a full restart would require so it should be more
* than long enough.
*/
mp_wait_for_cpu_up(slot_num, 30000, 1);
mp_enable_preemption();
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPU_FAST_START | DBG_FUNC_END,
slot_num, cpu_datap(slot_num)->cpu_running, 0, 0, 0);
/*
* Check to make sure that the CPU is really running. If not,
* go through the slow path.
*/
if (cpu_datap(slot_num)->cpu_running)
return(KERN_SUCCESS);
else
return(KERN_FAILURE);
}
static void
started_cpu(void)
{
/* Here on the started cpu with cpu_running set TRUE */
if (TSC_sync_margin &&
start_info.target_cpu == cpu_number()) {
/*
* I've just started-up, synchronize again with the starter cpu
* and then snap my TSC.
*/
tsc_target = 0;
atomic_decl(&tsc_entry_barrier, 1);
while (tsc_entry_barrier != 0)
; /* spin for starter and target at barrier */
tsc_target = rdtsc64();
atomic_decl(&tsc_exit_barrier, 1);
}
}
static void
start_cpu(void *arg)
{
int i = 1000;
processor_start_info_t *psip = (processor_start_info_t *) arg;
/* Ignore this if the current processor is not the starter */
if (cpu_number() != psip->starter_cpu)
return;
DBG("start_cpu(%p) about to start cpu %d, lapic %d\n",
arg, psip->target_cpu, psip->target_lapic);
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPU_START | DBG_FUNC_START,
psip->target_cpu,
psip->target_lapic, 0, 0, 0);
i386_start_cpu(psip->target_lapic, psip->target_cpu);
#ifdef POSTCODE_DELAY
/* Wait much longer if postcodes are displayed for a delay period. */
i *= 10000;
#endif
DBG("start_cpu(%p) about to wait for cpu %d\n",
arg, psip->target_cpu);
mp_wait_for_cpu_up(psip->target_cpu, i*100, 100);
KERNEL_DEBUG_CONSTANT(
TRACE_MP_CPU_START | DBG_FUNC_END,
psip->target_cpu,
cpu_datap(psip->target_cpu)->cpu_running, 0, 0, 0);
if (TSC_sync_margin &&
cpu_datap(psip->target_cpu)->cpu_running) {
/*
* Compare the TSC from the started processor with ours.
* Report and log/panic if it diverges by more than
* TSC_sync_margin (TSC_SYNC_MARGIN) ticks. This margin
* can be overriden by boot-arg (with 0 meaning no checking).
*/
uint64_t tsc_starter;
int64_t tsc_delta;
atomic_decl(&tsc_entry_barrier, 1);
while (tsc_entry_barrier != 0)
; /* spin for both processors at barrier */
tsc_starter = rdtsc64();
atomic_decl(&tsc_exit_barrier, 1);
while (tsc_exit_barrier != 0)
; /* spin for target to store its TSC */
tsc_delta = tsc_target - tsc_starter;
kprintf("TSC sync for cpu %d: 0x%016llx delta 0x%llx (%lld)\n",
psip->target_cpu, tsc_target, tsc_delta, tsc_delta);
if (ABS(tsc_delta) > (int64_t) TSC_sync_margin) {
#if DEBUG
panic(
#else
printf(
#endif
"Unsynchronized TSC for cpu %d: "
"0x%016llx, delta 0x%llx\n",
psip->target_cpu, tsc_target, tsc_delta);
}
}
}
kern_return_t
intel_startCPU(
int slot_num)
{
int lapic = cpu_to_lapic[slot_num];
boolean_t istate;
assert(lapic != -1);
DBGLOG_CPU_INIT(slot_num);
DBG("intel_startCPU(%d) lapic_id=%d\n", slot_num, lapic);
DBG("IdlePTD(%p): 0x%x\n", &IdlePTD, (int) (uintptr_t)IdlePTD);
/*
* Initialize (or re-initialize) the descriptor tables for this cpu.
* Propagate processor mode to slave.
*/
cpu_desc_init64(cpu_datap(slot_num));
/* Serialize use of the slave boot stack, etc. */
lck_mtx_lock(&mp_cpu_boot_lock);
istate = ml_set_interrupts_enabled(FALSE);
if (slot_num == get_cpu_number()) {
ml_set_interrupts_enabled(istate);
lck_mtx_unlock(&mp_cpu_boot_lock);
return KERN_SUCCESS;
}
start_info.starter_cpu = cpu_number();
start_info.target_cpu = slot_num;
start_info.target_lapic = lapic;
tsc_entry_barrier = 2;
tsc_exit_barrier = 2;
/*
* Perform the processor startup sequence with all running
* processors rendezvous'ed. This is required during periods when
* the cache-disable bit is set for MTRR/PAT initialization.
*/
mp_rendezvous_no_intrs(start_cpu, (void *) &start_info);
start_info.target_cpu = 0;
ml_set_interrupts_enabled(istate);
lck_mtx_unlock(&mp_cpu_boot_lock);
if (!cpu_datap(slot_num)->cpu_running) {
kprintf("Failed to start CPU %02d\n", slot_num);
printf("Failed to start CPU %02d, rebooting...\n", slot_num);
delay(1000000);
halt_cpu();
return KERN_SUCCESS;
} else {
kprintf("Started cpu %d (lapic id %08x)\n", slot_num, lapic);
return KERN_SUCCESS;
}
}
#if MP_DEBUG
cpu_signal_event_log_t *cpu_signal[MAX_CPUS];
cpu_signal_event_log_t *cpu_handle[MAX_CPUS];
MP_EVENT_NAME_DECL();
#endif /* MP_DEBUG */
int
cpu_signal_handler(x86_saved_state_t *regs)
{
#if !MACH_KDP
#pragma unused (regs)
#endif /* !MACH_KDP */
int my_cpu;
volatile int *my_word;
SCHED_STATS_IPI(current_processor());
my_cpu = cpu_number();
my_word = &cpu_data_ptr[my_cpu]->cpu_signals;
/* Store the initial set of signals for diagnostics. New
* signals could arrive while these are being processed
* so it's no more than a hint.
*/
cpu_data_ptr[my_cpu]->cpu_prior_signals = *my_word;
do {
#if MACH_KDP
if (i_bit(MP_KDP, my_word) && regs != NULL) {
DBGLOG(cpu_handle,my_cpu,MP_KDP);
i_bit_clear(MP_KDP, my_word);
/* Ensure that the i386_kernel_state at the base of the
* current thread's stack (if any) is synchronized with the
* context at the moment of the interrupt, to facilitate
* access through the debugger.
*/
sync_iss_to_iks(regs);
if (pmsafe_debug && !kdp_snapshot)
pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
mp_kdp_wait(TRUE, FALSE);
if (pmsafe_debug && !kdp_snapshot)
pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
} else
#endif /* MACH_KDP */
if (i_bit(MP_TLB_FLUSH, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_TLB_FLUSH);
i_bit_clear(MP_TLB_FLUSH, my_word);
pmap_update_interrupt();
} else if (i_bit(MP_AST, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_AST);
i_bit_clear(MP_AST, my_word);
ast_check(cpu_to_processor(my_cpu));
} else if (i_bit(MP_RENDEZVOUS, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_RENDEZVOUS);
i_bit_clear(MP_RENDEZVOUS, my_word);
mp_rendezvous_action();
} else if (i_bit(MP_BROADCAST, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_BROADCAST);
i_bit_clear(MP_BROADCAST, my_word);
mp_broadcast_action();
} else if (i_bit(MP_CHUD, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_CHUD);
i_bit_clear(MP_CHUD, my_word);
chudxnu_cpu_signal_handler();
} else if (i_bit(MP_CALL, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_CALL);
i_bit_clear(MP_CALL, my_word);
mp_cpus_call_action();
} else if (i_bit(MP_CALL_PM, my_word)) {
DBGLOG(cpu_handle,my_cpu,MP_CALL_PM);
i_bit_clear(MP_CALL_PM, my_word);
mp_call_PM();
}
} while (*my_word);
return 0;
}
static int
NMIInterruptHandler(x86_saved_state_t *regs)
{
void *stackptr;
if (panic_active() && !panicDebugging) {
if (pmsafe_debug)
pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
for(;;)
cpu_pause();
}
atomic_incl(&NMIPI_acks, 1);
atomic_incl(&NMI_count, 1);
sync_iss_to_iks_unconditionally(regs);
__asm__ volatile("movq %%rbp, %0" : "=m" (stackptr));
if (cpu_number() == debugger_cpu)
goto NMExit;
if (spinlock_timed_out) {
char pstr[192];
snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): NMIPI for spinlock acquisition timeout, spinlock: %p, spinlock owner: %p, current_thread: %p, spinlock_owner_cpu: 0x%x\n", cpu_number(), spinlock_timed_out, (void *) spinlock_timed_out->interlock.lock_data, current_thread(), spinlock_owner_cpu);
panic_i386_backtrace(stackptr, 64, &pstr[0], TRUE, regs);
} else if (pmap_tlb_flush_timeout == TRUE) {
char pstr[128];
snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor (this CPU did not acknowledge interrupts) TLB state:0x%x\n", cpu_number(), current_cpu_datap()->cpu_tlb_invalid);
panic_i386_backtrace(stackptr, 48, &pstr[0], TRUE, regs);
}
#if MACH_KDP
if (pmsafe_debug && !kdp_snapshot)
pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_SAFE);
current_cpu_datap()->cpu_NMI_acknowledged = TRUE;
mp_kdp_wait(FALSE, pmap_tlb_flush_timeout || spinlock_timed_out || panic_active());
if (pmsafe_debug && !kdp_snapshot)
pmSafeMode(¤t_cpu_datap()->lcpu, PM_SAFE_FL_NORMAL);
#endif
NMExit:
return 1;
}
/*
* cpu_interrupt is really just to be used by the scheduler to
* get a CPU's attention it may not always issue an IPI. If an
* IPI is always needed then use i386_cpu_IPI.
*/
void
cpu_interrupt(int cpu)
{
boolean_t did_IPI = FALSE;
if (smp_initialized
&& pmCPUExitIdle(cpu_datap(cpu))) {
i386_cpu_IPI(cpu);
did_IPI = TRUE;
}
KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_REMOTE_AST), cpu, did_IPI, 0, 0, 0);
}
/*
* Send a true NMI via the local APIC to the specified CPU.
*/
void
cpu_NMI_interrupt(int cpu)
{
if (smp_initialized) {
i386_send_NMI(cpu);
}
}
void
NMI_cpus(void)
{
unsigned int cpu;
boolean_t intrs_enabled;
uint64_t tsc_timeout;
intrs_enabled = ml_set_interrupts_enabled(FALSE);
for (cpu = 0; cpu < real_ncpus; cpu++) {
if (!cpu_datap(cpu)->cpu_running)
continue;
cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
cpu_NMI_interrupt(cpu);
tsc_timeout = !machine_timeout_suspended() ?
rdtsc64() + (1000 * 1000 * 1000 * 10ULL) :
~0ULL;
while (!cpu_datap(cpu)->cpu_NMI_acknowledged) {
handle_pending_TLB_flushes();
cpu_pause();
if (rdtsc64() > tsc_timeout)
panic("NMI_cpus() timeout cpu %d", cpu);
}
cpu_datap(cpu)->cpu_NMI_acknowledged = FALSE;
}
ml_set_interrupts_enabled(intrs_enabled);
}
static void (* volatile mp_PM_func)(void) = NULL;
static void
mp_call_PM(void)
{
assert(!ml_get_interrupts_enabled());
if (mp_PM_func != NULL)
mp_PM_func();
}
void
cpu_PM_interrupt(int cpu)
{
assert(!ml_get_interrupts_enabled());
if (mp_PM_func != NULL) {
if (cpu == cpu_number())
mp_PM_func();
else
i386_signal_cpu(cpu, MP_CALL_PM, ASYNC);
}
}
void
PM_interrupt_register(void (*fn)(void))
{
mp_PM_func = fn;
}
void
i386_signal_cpu(int cpu, mp_event_t event, mp_sync_t mode)
{
volatile int *signals = &cpu_datap(cpu)->cpu_signals;
uint64_t tsc_timeout;
if (!cpu_datap(cpu)->cpu_running)
return;
if (event == MP_TLB_FLUSH)
KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_START, cpu, 0, 0, 0, 0);
DBGLOG(cpu_signal, cpu, event);
i_bit_set(event, signals);
i386_cpu_IPI(cpu);
if (mode == SYNC) {
again:
tsc_timeout = !machine_timeout_suspended() ?
rdtsc64() + (1000*1000*1000) :
~0ULL;
while (i_bit(event, signals) && rdtsc64() < tsc_timeout) {
cpu_pause();
}
if (i_bit(event, signals)) {
DBG("i386_signal_cpu(%d, 0x%x, SYNC) timed out\n",
cpu, event);
goto again;
}
}
if (event == MP_TLB_FLUSH)
KERNEL_DEBUG(TRACE_MP_TLB_FLUSH | DBG_FUNC_END, cpu, 0, 0, 0, 0);
}
/*
* Send event to all running cpus.
* Called with the topology locked.
*/
void
i386_signal_cpus(mp_event_t event, mp_sync_t mode)
{
unsigned int cpu;
unsigned int my_cpu = cpu_number();
assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
for (cpu = 0; cpu < real_ncpus; cpu++) {
if (cpu == my_cpu || !cpu_datap(cpu)->cpu_running)
continue;
i386_signal_cpu(cpu, event, mode);
}
}
/*
* Return the number of running cpus.
* Called with the topology locked.
*/
int
i386_active_cpus(void)
{
unsigned int cpu;
unsigned int ncpus = 0;
assert(hw_lock_held((hw_lock_t)&x86_topo_lock));
for (cpu = 0; cpu < real_ncpus; cpu++) {
if (cpu_datap(cpu)->cpu_running)
ncpus++;
}
return(ncpus);
}
/*
* Helper function called when busy-waiting: panic if too long
* a TSC-based time has elapsed since the start of the spin.
*/
static void
mp_spin_timeout_check(uint64_t tsc_start, const char *msg)
{
uint64_t tsc_timeout;
cpu_pause();
if (machine_timeout_suspended())
return;
/*
* The timeout is 4 * the spinlock timeout period
* unless we have serial console printing (kprintf) enabled
* in which case we allow an even greater margin.
*/
tsc_timeout = disable_serial_output ? (uint64_t) LockTimeOutTSC << 2
: (uint64_t) LockTimeOutTSC << 4;
if (rdtsc64() > tsc_start + tsc_timeout)
panic("%s: spin timeout", msg);
}
/*
* All-CPU rendezvous:
* - CPUs are signalled,
* - all execute the setup function (if specified),
* - rendezvous (i.e. all cpus reach a barrier),
* - all execute the action function (if specified),
* - rendezvous again,
* - execute the teardown function (if specified), and then
* - resume.
*
* Note that the supplied external functions _must_ be reentrant and aware
* that they are running in parallel and in an unknown lock context.
*/
static void
mp_rendezvous_action(void)
{
boolean_t intrs_enabled;
uint64_t tsc_spin_start;
/* setup function */
if (mp_rv_setup_func != NULL)
mp_rv_setup_func(mp_rv_func_arg);
intrs_enabled = ml_get_interrupts_enabled();
/* spin on entry rendezvous */
atomic_incl(&mp_rv_entry, 1);
tsc_spin_start = rdtsc64();
while (mp_rv_entry < mp_rv_ncpus) {
/* poll for pesky tlb flushes if interrupts disabled */
if (!intrs_enabled)
handle_pending_TLB_flushes();
mp_spin_timeout_check(tsc_spin_start,
"mp_rendezvous_action() entry");
}
/* action function */
if (mp_rv_action_func != NULL)
mp_rv_action_func(mp_rv_func_arg);
/* spin on exit rendezvous */
atomic_incl(&mp_rv_exit, 1);
tsc_spin_start = rdtsc64();
while (mp_rv_exit < mp_rv_ncpus) {
if (!intrs_enabled)
handle_pending_TLB_flushes();
mp_spin_timeout_check(tsc_spin_start,
"mp_rendezvous_action() exit");
}
/* teardown function */
if (mp_rv_teardown_func != NULL)
mp_rv_teardown_func(mp_rv_func_arg);
/* Bump completion count */
atomic_incl(&mp_rv_complete, 1);
}
void
mp_rendezvous(void (*setup_func)(void *),
void (*action_func)(void *),
void (*teardown_func)(void *),
void *arg)
{
uint64_t tsc_spin_start;
if (!smp_initialized) {
if (setup_func != NULL)
setup_func(arg);
if (action_func != NULL)
action_func(arg);
if (teardown_func != NULL)
teardown_func(arg);
return;
}
/* obtain rendezvous lock */
simple_lock(&mp_rv_lock);
/* set static function pointers */
mp_rv_setup_func = setup_func;
mp_rv_action_func = action_func;
mp_rv_teardown_func = teardown_func;
mp_rv_func_arg = arg;
mp_rv_entry = 0;
mp_rv_exit = 0;
mp_rv_complete = 0;
/*
* signal other processors, which will call mp_rendezvous_action()
* with interrupts disabled
*/
simple_lock(&x86_topo_lock);
mp_rv_ncpus = i386_active_cpus();
i386_signal_cpus(MP_RENDEZVOUS, ASYNC);
simple_unlock(&x86_topo_lock);
/* call executor function on this cpu */
mp_rendezvous_action();
/*
* Spin for everyone to complete.
* This is necessary to ensure that all processors have proceeded
* from the exit barrier before we release the rendezvous structure.
*/
tsc_spin_start = rdtsc64();
while (mp_rv_complete < mp_rv_ncpus) {
mp_spin_timeout_check(tsc_spin_start, "mp_rendezvous()");
}
/* Tidy up */
mp_rv_setup_func = NULL;
mp_rv_action_func = NULL;
mp_rv_teardown_func = NULL;
mp_rv_func_arg = NULL;
/* release lock */
simple_unlock(&mp_rv_lock);
}
void
mp_rendezvous_break_lock(void)
{
simple_lock_init(&mp_rv_lock, 0);
}
static void
setup_disable_intrs(__unused void * param_not_used)
{
/* disable interrupts before the first barrier */
boolean_t intr = ml_set_interrupts_enabled(FALSE);
current_cpu_datap()->cpu_iflag = intr;
DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
}
static void
teardown_restore_intrs(__unused void * param_not_used)
{
/* restore interrupt flag following MTRR changes */
ml_set_interrupts_enabled(current_cpu_datap()->cpu_iflag);
DBG("CPU%d: %s\n", get_cpu_number(), __FUNCTION__);
}
/*
* A wrapper to mp_rendezvous() to call action_func() with interrupts disabled.
* This is exported for use by kexts.
*/
void
mp_rendezvous_no_intrs(
void (*action_func)(void *),
void *arg)
{
mp_rendezvous(setup_disable_intrs,
action_func,
teardown_restore_intrs,
arg);
}
typedef struct {
queue_chain_t link; /* queue linkage */
void (*func)(void *,void *); /* routine to call */
void *arg0; /* routine's 1st arg */
void *arg1; /* routine's 2nd arg */
volatile long *countp; /* completion counter */
} mp_call_t;
typedef struct {
queue_head_t queue;
decl_simple_lock_data(, lock);
} mp_call_queue_t;
#define MP_CPUS_CALL_BUFS_PER_CPU MAX_CPUS
static mp_call_queue_t mp_cpus_call_freelist;
static mp_call_queue_t mp_cpus_call_head[MAX_CPUS];
static inline boolean_t
mp_call_head_lock(mp_call_queue_t *cqp)
{
boolean_t intrs_enabled;
intrs_enabled = ml_set_interrupts_enabled(FALSE);
simple_lock(&cqp->lock);
return intrs_enabled;
}
static inline boolean_t
mp_call_head_is_locked(mp_call_queue_t *cqp)
{
return !ml_get_interrupts_enabled() &&
hw_lock_held((hw_lock_t)&cqp->lock);
}
static inline void
mp_call_head_unlock(mp_call_queue_t *cqp, boolean_t intrs_enabled)
{
simple_unlock(&cqp->lock);
ml_set_interrupts_enabled(intrs_enabled);
}
static inline mp_call_t *
mp_call_alloc(void)
{
mp_call_t *callp = NULL;
boolean_t intrs_enabled;
mp_call_queue_t *cqp = &mp_cpus_call_freelist;