forked from erlang/otp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
erl_process.c
15114 lines (12970 loc) · 436 KB
/
erl_process.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 1996-2023. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* %CopyrightEnd%
*/
#define ERL_PROCESS_C__
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#define ERTS_WANT_BREAK_HANDLING
#include <stddef.h> /* offsetof() */
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
#include "erl_process.h"
#include "error.h"
#include "bif.h"
#include "erl_db.h"
#include "dist.h"
#include "beam_catches.h"
#include "erl_threads.h"
#include "erl_binary.h"
#include "beam_bp.h"
#include "erl_cpu_topology.h"
#include "erl_thr_progress.h"
#include "erl_thr_queue.h"
#include "erl_async.h"
#include "dtrace-wrapper.h"
#include "erl_ptab.h"
#include "erl_bif_unique.h"
#define ERTS_WANT_TIMER_WHEEL_API
#include "erl_time.h"
#include "erl_nfunc_sched.h"
#include "erl_check_io.h"
#include "erl_poll.h"
#include "erl_proc_sig_queue.h"
#include "beam_common.h"
#define ERTS_CHECK_TIME_REDS CONTEXT_REDS
#define ERTS_DELAYED_WAKEUP_INFINITY (~(Uint64) 0)
#define ERTS_DELAYED_WAKEUP_REDUCTIONS ((Uint64) CONTEXT_REDS/2)
#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
(ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED/2)
#define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10)
#define ERTS_SCHED_SPIN_UNTIL_YIELD 100
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_VERY_LONG 40
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_VERY_LONG 1000
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_LONG 20
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_LONG 1000
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_MEDIUM 10
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_MEDIUM 1000
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_SHORT 10
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_SHORT 0
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_VERY_SHORT 5
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_VERY_SHORT 0
#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_NONE 0
#define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_NONE 0
#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT 1000
#define ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT 0
#if 0 || defined(DEBUG)
#define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
#endif
#if defined(DEBUG) && 0
#define HARDDEBUG
#else
#undef HARDDEBUG
#endif
#ifdef ERTS_ENABLE_LOCK_COUNT
#include "erl_lock_count.h"
#endif
#define MAX_BIT (1 << PRIORITY_MAX)
#define HIGH_BIT (1 << PRIORITY_HIGH)
#define NORMAL_BIT (1 << PRIORITY_NORMAL)
#define LOW_BIT (1 << PRIORITY_LOW)
#define PORT_BIT (1 << ERTS_PORT_PRIO_LEVEL)
#define ERTS_IS_RUNQ_EMPTY_FLGS(FLGS) \
(!((FLGS) & (ERTS_RUNQ_FLGS_QMASK|ERTS_RUNQ_FLG_MISC_OP)))
#define ERTS_IS_RUNQ_EMPTY_PORTS_FLGS(FLGS) \
(!((FLGS) & (PORT_BIT|ERTS_RUNQ_FLG_MISC_OP)))
#define ERTS_EMPTY_RUNQ(RQ) \
ERTS_IS_RUNQ_EMPTY_FLGS(ERTS_RUNQ_FLGS_GET_NOB((RQ)))
#define ERTS_EMPTY_RUNQ_PORTS(RQ) \
ERTS_IS_RUNQ_EMPTY_FLGS(ERTS_RUNQ_FLGS_GET_NOB((RQ)))
static ERTS_INLINE int
runq_got_work_to_execute_flags(Uint32 flags)
{
if (flags & ERTS_RUNQ_FLG_HALTING)
return !ERTS_IS_RUNQ_EMPTY_PORTS_FLGS(flags);
return !ERTS_IS_RUNQ_EMPTY_FLGS(flags);
}
static ERTS_INLINE int
runq_got_work_to_execute(ErtsRunQueue *rq)
{
return runq_got_work_to_execute_flags(ERTS_RUNQ_FLGS_GET_NOB(rq));
}
const Process erts_invalid_process = {{ERTS_INVALID_PID}};
int ERTS_WRITE_UNLIKELY(erts_default_spo_flags) = SPO_ON_HEAP_MSGQ;
int ERTS_WRITE_UNLIKELY(erts_sched_compact_load);
int ERTS_WRITE_UNLIKELY(erts_sched_balance_util) = 0;
Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers);
Uint ERTS_WRITE_UNLIKELY(erts_no_total_schedulers);
Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers) = 0;
Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers) = 0;
int ERTS_WRITE_UNLIKELY(erts_no_aux_work_threads);
static char *erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_NO_FLAGS] = {0};
int erts_aux_work_no_flags = ERTS_SSI_AUX_WORK_NO_FLAGS;
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_VERY_LAZY (4*1024*1024)
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_LAZY (512*1024)
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_MEDIUM (64*1024)
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_EAGER (16*1024)
#define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_VERY_EAGER (1024)
static UWord thr_prgr_later_cleanup_op_threshold = ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_MEDIUM;
ErtsPTab erts_proc erts_align_attribute(ERTS_CACHE_LINE_SIZE);
int erts_sched_thread_suggested_stack_size = -1;
int erts_dcpu_sched_thread_suggested_stack_size = -1;
int erts_dio_sched_thread_suggested_stack_size = -1;
#ifdef ERTS_ENABLE_LOCK_CHECK
ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
#endif
typedef struct {
int aux_work;
int tse;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
static ERTS_INLINE ErtsBusyWaitParams *
sched_get_busy_wait_params(ErtsSchedulerData *esdp)
{
return &sched_busy_wait_params[esdp->type];
}
typedef union {
ErtsAuxWorkData data;
char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsAuxWorkData))];
} ErtsAlignedAuxWorkData;
static ErtsAlignedAuxWorkData *ERTS_WRITE_UNLIKELY(aligned_aux_work_data);
#define ERTS_SCHDLR_SSPND_CHNG_NMSB (((erts_aint32_t) 1) << 0)
#define ERTS_SCHDLR_SSPND_CHNG_MSB (((erts_aint32_t) 1) << 1)
#define ERTS_SCHDLR_SSPND_CHNG_ONLN (((erts_aint32_t) 1) << 2)
#define ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN (((erts_aint32_t) 1) << 3)
typedef struct ErtsMultiSchedulingBlock_ {
int ongoing;
ErtsProcList *blckrs;
ErtsProcList *chngq;
} ErtsMultiSchedulingBlock;
typedef struct ErtsSchedTypeCounters_ {
Uint32 normal;
Uint32 dirty_cpu;
Uint32 dirty_io;
} ErtsSchedTypeCounters;
static struct ErtsSchedSuspend_ {
erts_mtx_t mtx;
ErtsSchedTypeCounters online;
ErtsSchedTypeCounters curr_online;
ErtsSchedTypeCounters active;
erts_atomic32_t changing;
ErtsProcList *chngq;
Eterm changer;
ErtsMultiSchedulingBlock nmsb; /* Normal multi Scheduling Block */
ErtsMultiSchedulingBlock msb; /* Multi Scheduling Block */
ErtsSchedType last_msb_dirty_type;
} schdlr_sspnd;
static void init_scheduler_suspend(void);
static ERTS_INLINE Uint32
schdlr_sspnd_eq_nscheds(ErtsSchedTypeCounters *val1p, ErtsSchedTypeCounters *val2p)
{
int res = val1p->normal == val2p->normal;
res &= val1p->dirty_cpu == val2p->dirty_cpu;
res &= val1p->dirty_io == val2p->dirty_io;
return res;
}
static ERTS_INLINE Uint32
schdlr_sspnd_get_nscheds(ErtsSchedTypeCounters *valp,
ErtsSchedType type)
{
switch (type) {
case ERTS_SCHED_NORMAL:
return valp->normal;
case ERTS_SCHED_DIRTY_CPU:
return valp->dirty_cpu;
case ERTS_SCHED_DIRTY_IO:
return valp->dirty_io;
default:
ERTS_INTERNAL_ERROR("Invalid scheduler type");
return 0;
}
}
#ifdef DEBUG
static ERTS_INLINE Uint32
schdlr_sspnd_get_nscheds_tot(ErtsSchedTypeCounters *valp)
{
Uint32 res = valp->normal;
res += valp->dirty_cpu;
res += valp->dirty_io;
return res;
}
#endif
static ERTS_INLINE void
schdlr_sspnd_dec_nscheds(ErtsSchedTypeCounters *valp,
ErtsSchedType type)
{
ASSERT(schdlr_sspnd_get_nscheds(valp, type) > 0);
switch (type) {
case ERTS_SCHED_NORMAL:
valp->normal--;
break;
case ERTS_SCHED_DIRTY_CPU:
valp->dirty_cpu--;
break;
case ERTS_SCHED_DIRTY_IO:
valp->dirty_io--;
break;
default:
ERTS_INTERNAL_ERROR("Invalid scheduler type");
}
}
static ERTS_INLINE void
schdlr_sspnd_inc_nscheds(ErtsSchedTypeCounters *valp,
ErtsSchedType type)
{
switch (type) {
case ERTS_SCHED_NORMAL:
valp->normal++;
break;
case ERTS_SCHED_DIRTY_CPU:
valp->dirty_cpu++;
break;
case ERTS_SCHED_DIRTY_IO:
valp->dirty_io++;
break;
default:
ERTS_INTERNAL_ERROR("Invalid scheduler type");
}
}
static ERTS_INLINE void
schdlr_sspnd_set_nscheds(ErtsSchedTypeCounters *valp,
ErtsSchedType type, Uint32 no)
{
switch (type) {
case ERTS_SCHED_NORMAL:
valp->normal = no;
break;
case ERTS_SCHED_DIRTY_CPU:
valp->dirty_cpu = no;
break;
case ERTS_SCHED_DIRTY_IO:
valp->dirty_io = no;
break;
default:
ERTS_INTERNAL_ERROR("Invalid scheduler type");
}
}
static struct {
erts_mtx_t update_mtx;
erts_atomic32_t no_runqs;
int last_active_runqs;
int forced_check_balance;
erts_atomic32_t checking_balance;
int halftime;
int full_reds_history_index;
struct {
int active_runqs;
int reds;
erts_aint32_t max_len;
} prev_rise;
Uint n;
} balance_info;
#define ERTS_BLNCE_SAVE_RISE(ACTIVE, MAX_LEN, REDS) \
do { \
balance_info.prev_rise.active_runqs = (ACTIVE); \
balance_info.prev_rise.max_len = (MAX_LEN); \
balance_info.prev_rise.reds = (REDS); \
} while (0)
erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
#if ERTS_POLL_USE_SCHEDULER_POLLING
static erts_atomic32_t function_calls;
static erts_atomic32_t doing_sys_schedule;
#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
static erts_tid_t runq_supervisor_tid;
static erts_atomic_t runq_supervisor_sleeping;
ErtsAlignedRunQueue * ERTS_WRITE_UNLIKELY(erts_aligned_run_queues);
Uint ERTS_WRITE_UNLIKELY(erts_no_run_queues);
struct {
union {
erts_atomic32_t active;
char align__[ERTS_CACHE_LINE_SIZE];
} cpu;
union {
erts_atomic32_t active;
char align__[ERTS_CACHE_LINE_SIZE];
} io;
} dirty_count erts_align_attribute(ERTS_CACHE_LINE_SIZE);
static ERTS_INLINE void
dirty_active(ErtsSchedulerData *esdp, erts_aint32_t add)
{
erts_aint32_t val;
erts_atomic32_t *ap;
switch (esdp->type) {
case ERTS_SCHED_DIRTY_CPU:
ap = &dirty_count.cpu.active;
break;
case ERTS_SCHED_DIRTY_IO:
ap = &dirty_count.io.active;
break;
default:
ap = NULL;
ERTS_INTERNAL_ERROR("Not a dirty scheduler");
break;
}
/*
* All updates done under run-queue lock, so
* no inc or dec needed...
*/
ERTS_LC_ASSERT(erts_lc_runq_is_locked(esdp->run_queue));
val = erts_atomic32_read_nob(ap);
val += add;
erts_atomic32_set_nob(ap, val);
}
ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_scheduler_data);
ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_cpu_scheduler_data);
ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_io_scheduler_data);
typedef union {
Process dsp;
char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(Process))];
} ErtsAlignedDirtyShadowProcess;
typedef union {
ErtsSchedulerSleepInfo ssi;
char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerSleepInfo))];
} ErtsAlignedSchedulerSleepInfo;
static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info;
static ErtsAlignedSchedulerSleepInfo *aligned_dirty_cpu_sched_sleep_info;
static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info;
typedef struct {
erts_mtx_t mtx;
erts_cnd_t cnd;
int blocked;
int id;
} ErtsBlockPollThreadData;
typedef union {
ErtsBlockPollThreadData block_data;
char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsBlockPollThreadData))];
} ErtsAlignedBlockPollThreadData;
static ErtsAlignedBlockPollThreadData *ERTS_WRITE_UNLIKELY(block_poll_thread_data);
static Uint last_reductions;
static Uint last_exact_reductions;
Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor);
Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_long_gc);
Uint ERTS_WRITE_UNLIKELY(erts_system_monitor_long_schedule);
Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_large_heap);
struct erts_system_monitor_flags_t erts_system_monitor_flags;
/* system performance monitor */
Eterm erts_system_profile;
struct erts_system_profile_flags_t erts_system_profile_flags;
int erts_system_profile_ts_type = ERTS_TRACE_FLG_NOW_TIMESTAMP;
#if ERTS_MAX_PROCESSES > 0x7fffffff
#error "Need to store process_count in another type"
#endif
typedef enum {
ERTS_PSTT_GC_MAJOR, /* Garbage Collect: Fullsweep */
ERTS_PSTT_GC_MINOR, /* Garbage Collect: Generational */
ERTS_PSTT_CPC, /* Check Process Code */
ERTS_PSTT_CLA, /* Copy Literal Area */
ERTS_PSTT_FTMQ, /* Flush trace msg queue */
ERTS_PSTT_ETS_FREE_FIXATION,
ERTS_PSTT_PRIO_SIG, /* Elevate prio on signal management */
ERTS_PSTT_TEST
} ErtsProcSysTaskType;
#define ERTS_MAX_PROC_SYS_TASK_ARGS 2
struct ErtsProcSysTask_ {
ErtsProcSysTask *next;
ErtsProcSysTask *prev;
ErtsProcSysTaskType type;
Eterm prio;
Eterm requester;
Eterm reply_tag;
Eterm req_id;
Uint req_id_sz;
Eterm arg[ERTS_MAX_PROC_SYS_TASK_ARGS];
ErlOffHeap off_heap;
Eterm heap[1];
};
#define ERTS_PROC_SYS_TASK_SIZE(HSz) \
(sizeof(ErtsProcSysTask) - sizeof(Eterm) + sizeof(Eterm)*(HSz))
struct ErtsProcSysTaskQs_ {
int qmask;
int ncount;
ErtsProcSysTask *q[ERTS_NO_PROC_PRIO_LEVELS];
};
ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proc_sys_task_queues,
ErtsProcSysTaskQs,
50,
ERTS_ALC_T_PROC_SYS_TSK_QS)
ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(misc_op_list,
ErtsMiscOpList,
10,
ERTS_ALC_T_MISC_OP_LIST)
ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
ErtsProcList,
200,
ERTS_ALC_T_PROC_LIST)
#define ERTS_SCHED_SLEEP_INFO_IX(IX) \
(ASSERT((((int)-1) <= ((int) (IX))) \
&& (((int) (IX)) < (erts_no_aux_work_threads-1))), \
&aligned_sched_sleep_info[(IX)].ssi)
#define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \
(ASSERT(0 <= ((int) (IX)) \
&& ((int) (IX)) < ((int) erts_no_dirty_cpu_schedulers)), \
&aligned_dirty_cpu_sched_sleep_info[(IX)].ssi)
#define ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(IX) \
(ASSERT(0 <= ((int) (IX)) \
&& ((int) (IX)) < ((int) erts_no_dirty_io_schedulers)), \
&aligned_dirty_io_sched_sleep_info[(IX)].ssi)
#define ERTS_FOREACH_RUNQ(RQVAR, DO) \
do { \
ErtsRunQueue *RQVAR; \
int ix__; \
for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \
RQVAR = ERTS_RUNQ_IX(ix__); \
erts_runq_lock(RQVAR); \
{ DO; } \
erts_runq_unlock(RQVAR); \
} \
} while (0)
#define ERTS_FOREACH_OP_RUNQ(RQVAR, DO) \
do { \
ErtsRunQueue *RQVAR; \
int ix__; \
int online__ = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, \
ERTS_SCHED_NORMAL); \
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&schdlr_sspnd.mtx)); \
for (ix__ = 0; ix__ < online__; ix__++) { \
RQVAR = ERTS_RUNQ_IX(ix__); \
erts_runq_lock(RQVAR); \
{ DO; } \
erts_runq_unlock(RQVAR); \
} \
} while (0)
#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, NRQS, DO, DOX) \
do { \
ErtsRunQueue *RQVAR; \
int nrqs = (NRQS); \
int ix__; \
for (ix__ = 0; ix__ < nrqs; ix__++) { \
RQVAR = ERTS_RUNQ_IX(ix__); \
erts_runq_lock(RQVAR); \
{ DO; } \
} \
{ DOX; } \
for (ix__ = 0; ix__ < nrqs; ix__++) \
erts_runq_unlock(ERTS_RUNQ_IX(ix__)); \
} while (0)
#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \
ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, DO, )
#define ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(RQVAR, DO) \
ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues, DO, )
/*
* Local functions.
*/
static void exec_misc_ops(ErtsRunQueue *);
static void print_function_from_pc(fmtfn_t to, void *to_arg, ErtsCodePtr x);
static Uint stack_element_dump(fmtfn_t to, void *to_arg, Eterm* sp, Uint yreg);
static void aux_work_timeout(void *unused);
static void aux_work_timeout_early_init(int max_no_aux_work_threads);
static void setup_aux_work_timer(ErtsSchedulerData *esdp);
static int execute_sys_tasks(Process *c_p,
erts_aint32_t *statep,
int in_reds);
static int cleanup_sys_tasks(Process *c_p,
erts_aint32_t in_state,
int in_reds);
#if defined(DEBUG) || 0
#define ERTS_DBG_CHK_AUX_WORK_VAL(V) dbg_chk_aux_work_val((V))
static void
dbg_chk_aux_work_val(erts_aint32_t value)
{
erts_aint32_t valid = 0;
valid |= ERTS_SSI_AUX_WORK_SET_TMO;
valid |= ERTS_SSI_AUX_WORK_MISC;
valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM;
valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC;
valid |= ERTS_SSI_AUX_WORK_ASYNC_READY;
valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN;
valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP;
valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR;
valid |= ERTS_SSI_AUX_WORK_DD;
valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR;
valid |= ERTS_SSI_AUX_WORK_CNCLD_TMRS;
valid |= ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR;
valid |= ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP;
#if HAVE_ERTS_MSEG
valid |= ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK;
#endif
#ifdef ERTS_SSI_AUX_WORK_REAP_PORTS
valid |= ERTS_SSI_AUX_WORK_REAP_PORTS;
#endif
valid |= ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED;
valid |= ERTS_SSI_AUX_WORK_YIELD;
if (~valid & value)
erts_exit(ERTS_ABORT_EXIT,
"Invalid aux_work value found: 0x%x\n",
~valid & value);
}
#define ERTS_DBG_CHK_SSI_AUX_WORK(SSI) \
ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&(SSI)->aux_work))
#else
#define ERTS_DBG_CHK_AUX_WORK_VAL(V)
#define ERTS_DBG_CHK_SSI_AUX_WORK(SSI)
#endif
static void wake_scheduler(ErtsRunQueue *rq);
#if defined(ERTS_ENABLE_LOCK_CHECK)
int
erts_lc_runq_is_locked(ErtsRunQueue *runq)
{
return erts_lc_mtx_is_locked(&runq->mtx);
}
#endif
static ERTS_INLINE Uint64
ensure_later_proc_interval(Uint64 interval)
{
return erts_ensure_later_interval_nob(erts_ptab_interval(&erts_proc), interval);
}
Uint64
erts_get_proc_interval(void)
{
return erts_current_interval_nob(erts_ptab_interval(&erts_proc));
}
Uint64
erts_ensure_later_proc_interval(Uint64 interval)
{
return ensure_later_proc_interval(interval);
}
Uint64
erts_step_proc_interval(void)
{
return erts_step_interval_nob(erts_ptab_interval(&erts_proc));
}
void
erts_pre_init_process(void)
{
erts_tsd_key_create(&sched_data_key, "erts_sched_data_key");
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP_IX]
= "DELAYED_AW_WAKEUP";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DD_IX]
= "DD";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DD_THR_PRGR_IX]
= "DD_THR_PRGR";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC_IX]
= "FIX_ALLOC_DEALLOC";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM_IX]
= "FIX_ALLOC_LOWER_LIM";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP_IX]
= "THR_PRGR_LATER_OP";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_CNCLD_TMRS_IX]
= "CNCLD_TMRS";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR_IX]
= "CNCLD_TMRS_THR_PRGR";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_ASYNC_READY_IX]
= "ASYNC_READY";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN_IX]
= "ASYNC_READY_CLEAN";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MISC_THR_PRGR_IX]
= "MISC_THR_PRGR";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MISC_IX]
= "MISC";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_SET_TMO_IX]
= "SET_TMO";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK_IX]
= "MSEG_CACHE_CHECK";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_YIELD_IX]
= "YIELD";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_REAP_PORTS_IX]
= "REAP_PORTS";
erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED_IX]
= "DEBUG_WAIT_COMPLETED";
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].get_locks
= ERTS_PSD_ERROR_HANDLER_BUF_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].set_locks
= ERTS_PSD_ERROR_HANDLER_BUF_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].get_locks
= ERTS_PSD_SAVED_CALLS_BUF_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].set_locks
= ERTS_PSD_SAVED_CALLS_BUF_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_SCHED_ID].get_locks
= ERTS_PSD_SCHED_ID_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_SCHED_ID].set_locks
= ERTS_PSD_SCHED_ID_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks
= ERTS_PSD_CALL_TIME_BP_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks
= ERTS_PSD_CALL_TIME_BP_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].get_locks
= ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks
= ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_NFUNC_TRAP_WRAPPER].get_locks
= ERTS_PSD_NFUNC_TRAP_WRAPPER_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_NFUNC_TRAP_WRAPPER].set_locks
= ERTS_PSD_NFUNC_TRAP_WRAPPER_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_ETS_OWNED_TABLES].get_locks
= ERTS_PSD_ETS_OWNED_TABLES_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_ETS_OWNED_TABLES].set_locks
= ERTS_PSD_ETS_OWNED_TABLES_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_ETS_FIXED_TABLES].get_locks
= ERTS_PSD_ETS_FIXED_TABLES_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_ETS_FIXED_TABLES].set_locks
= ERTS_PSD_ETS_FIXED_TABLES_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks
= ERTS_PSD_DIST_ENTRY_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks
= ERTS_PSD_DIST_ENTRY_SET_LOCKS;
erts_psd_required_locks[ERTS_PSD_PENDING_SUSPEND].get_locks
= ERTS_PSD_PENDING_SUSPEND_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_PENDING_SUSPEND].set_locks
= ERTS_PSD_PENDING_SUSPEND_SET_LOCKS;
#endif
}
/* initialize the scheduler */
void
erts_init_process(int ncpu, int proc_tab_size, int legacy_proc_tab)
{
erts_init_proc_lock(ncpu);
init_proclist_alloc();
erts_ptab_init_table(&erts_proc,
ERTS_ALC_T_PROC_TABLE,
NULL,
(ErtsPTabElementCommon *) &erts_invalid_process.common,
proc_tab_size,
sizeof(Process),
"process_table",
legacy_proc_tab,
1
);
last_reductions = 0;
last_exact_reductions = 0;
}
void
erts_late_init_process(void)
{
int ix;
erts_spinlock_init(&erts_sched_stat.lock, "sched_stat", NIL,
ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER);
for (ix = 0; ix < ERTS_NO_PRIO_LEVELS; ix++) {
Eterm atom;
char *atom_str;
switch (ix) {
case PRIORITY_MAX:
atom_str = "process_max";
break;
case PRIORITY_HIGH:
atom_str = "process_high";
break;
case PRIORITY_NORMAL:
atom_str = "process_normal";
break;
case PRIORITY_LOW:
atom_str = "process_low";
break;
case ERTS_PORT_PRIO_LEVEL:
atom_str = "port";
break;
default:
atom_str = "bad_prio";
ASSERT(!"bad prio");
break;
}
atom = am_atom_put(atom_str, sys_strlen(atom_str));
erts_sched_stat.prio[ix].name = atom;
erts_sched_stat.prio[ix].total_executed = 0;
erts_sched_stat.prio[ix].executed = 0;
erts_sched_stat.prio[ix].total_migrated = 0;
erts_sched_stat.prio[ix].migrated = 0;
}
}
#define ERTS_SCHED_WTIME_IDLE ~((Uint64) 0)
static void
init_sched_wall_time(ErtsSchedulerData *esdp, Uint64 time_stamp)
{
if (esdp->type != ERTS_SCHED_NORMAL) {
erts_atomic32_init_nob(&esdp->sched_wall_time.u.mod, 0);
esdp->sched_wall_time.enabled = 1;
esdp->sched_wall_time.start = time_stamp;
esdp->sched_wall_time.working.total = 0;
esdp->sched_wall_time.working.start = ERTS_SCHED_WTIME_IDLE;
}
else
{
esdp->sched_wall_time.u.need = erts_sched_balance_util;
esdp->sched_wall_time.enabled = 0;
esdp->sched_wall_time.start = 0;
esdp->sched_wall_time.working.total = 0;
esdp->sched_wall_time.working.start = 0;
}
}
static ERTS_INLINE Uint64
sched_wall_time_ts(void)
{
#ifdef ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT
return (Uint64) erts_os_monotonic_time();
#else
Uint64 res;
SysTimeval tv;
sys_gettimeofday(&tv);
res = (Uint64) tv.tv_sec*1000000;
res += (Uint64) tv.tv_usec;
return res;
#endif
}
#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT
static ERTS_INLINE Uint64
aschedtime_read(ErtsAtomicSchedTime *var)
{
return (Uint64) erts_atomic64_read_nob((erts_atomic64_t *) var);
}
static ERTS_INLINE void
aschedtime_set(ErtsAtomicSchedTime *var, Uint64 val)
{
erts_atomic64_set_nob((erts_atomic64_t *) var, (erts_aint64_t) val);
}
static ERTS_INLINE void
aschedtime_init(ErtsAtomicSchedTime *var)
{
erts_atomic64_init_nob((erts_atomic64_t *) var, (erts_aint64_t) 0);
}
#define ERTS_GET_AVG_MAX_UNLOCKED_TRY 50
#define ERTS_SCHED_AVG_UTIL_WRITE_MARKER (~((Uint64) 0))
/* Intervals in nanoseconds */
#define ERTS_SCHED_UTIL_SHORT_INTERVAL ((Uint64) 1*1000*1000*1000)
#define ERTS_SCHED_UTIL_LONG_INTERVAL ((Uint64) 10*1000*1000*1000)
#define ERTS_SCHED_UTIL_IGNORE_IMBALANCE_DIFF 5000 /* ppm */
static ERTS_INLINE Uint64
calc_sched_worktime(int is_working, Uint64 now, Uint64 last,
Uint64 interval, Uint64 old_worktime)
{
Uint64 worktime;
Uint64 new;
if (now <= last)
return old_worktime;
new = now - last;
if (new >= interval)
return is_working ? interval : (Uint64) 0;
/*
* Division by 1000 in order to avoid
* overflow. If changed update assertions
* in init_runq_sched_util().
*/
worktime = old_worktime;
worktime *= (interval - new)/1000;
worktime /= (interval/1000);
if (is_working)
worktime += new;
ASSERT(0 <= worktime && worktime <= interval);
return worktime;
}
static ERTS_INLINE void
update_avg_sched_util(ErtsSchedulerData *esdp, Uint64 now, int is_working)
{
ErtsRunQueue *rq;
int worked;
Uint64 swt, lwt, last;
rq = esdp->run_queue;
last = aschedtime_read(&rq->sched_util.last);
if (now <= last) {
ASSERT(last == ERTS_SCHED_AVG_UTIL_WRITE_MARKER);
return;
}
ASSERT(now >= last);
worked = rq->sched_util.is_working;
swt = calc_sched_worktime(worked, now, last, ERTS_SCHED_UTIL_SHORT_INTERVAL,
rq->sched_util.worktime.short_interval);
lwt = calc_sched_worktime(worked, now, last, ERTS_SCHED_UTIL_LONG_INTERVAL,
rq->sched_util.worktime.long_interval);
aschedtime_set(&rq->sched_util.last, ERTS_SCHED_AVG_UTIL_WRITE_MARKER);
ERTS_THR_WRITE_MEMORY_BARRIER;
rq->sched_util.is_working = is_working;
rq->sched_util.worktime.short_interval = swt;
rq->sched_util.worktime.long_interval = lwt;
ERTS_THR_WRITE_MEMORY_BARRIER;
aschedtime_set(&rq->sched_util.last, now);
}
int
erts_get_sched_util(ErtsRunQueue *rq, int initially_locked, int short_interval)
{
/* Average scheduler utilization in ppm */
int util, is_working, try = 0, locked = initially_locked;
Uint64 worktime, old_worktime, now, last, interval, *old_worktimep;
if (short_interval) {
old_worktimep = &rq->sched_util.worktime.short_interval;
interval = ERTS_SCHED_UTIL_SHORT_INTERVAL;
}
else {
old_worktimep = &rq->sched_util.worktime.long_interval;
interval = ERTS_SCHED_UTIL_LONG_INTERVAL;
}
while (1) {
Uint64 chk_last;
last = aschedtime_read(&rq->sched_util.last);
ERTS_THR_READ_MEMORY_BARRIER;
is_working = rq->sched_util.is_working;
old_worktime = *old_worktimep;
ERTS_THR_READ_MEMORY_BARRIER;
chk_last = aschedtime_read(&rq->sched_util.last);
if (chk_last == last)
break;
if (!locked) {
if (++try >= ERTS_GET_AVG_MAX_UNLOCKED_TRY) {
/* Writer will eventually block on runq-lock */
erts_runq_lock(rq);
locked = 1;
}
}
}
if (!initially_locked && locked)
erts_runq_unlock(rq);
now = sched_wall_time_ts();
worktime = calc_sched_worktime(is_working, now, last, interval, old_worktime);
util = (int) ((worktime * 1000000)/interval);
ASSERT(0 <= util && util <= 1000000);
return util;
}
static void
init_runq_sched_util(ErtsRunQueueSchedUtil *rqsu, int enabled)
{
aschedtime_init(&rqsu->last);
if (!enabled)
aschedtime_set(&rqsu->last, ERTS_SCHED_AVG_UTIL_WRITE_MARKER);
rqsu->is_working = 0;
rqsu->worktime.short_interval = (Uint64) 0;
rqsu->worktime.long_interval = (Uint64) 0;