-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.html
1916 lines (1659 loc) · 97.2 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>kent每天都要进步</title>
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta name="description" content="勿在浮沙筑高台">
<meta property="og:type" content="website">
<meta property="og:title" content="kent每天都要进步">
<meta property="og:url" content="http://blog.xuguruogu.com/index.html">
<meta property="og:site_name" content="kent每天都要进步">
<meta property="og:description" content="勿在浮沙筑高台">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="kent每天都要进步">
<meta name="twitter:description" content="勿在浮沙筑高台">
<link rel="alternate" href="/atom.xml" title="kent每天都要进步" type="application/atom+xml">
<link rel="icon" href="/favicon.png">
<link href="//fonts.googleapis.com/css?family=Source+Code+Pro" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="/css/style.css">
</head>
<body>
<div id="container">
<div id="wrap">
<header id="header">
<div id="banner"></div>
<div id="header-outer" class="outer">
<div id="header-title" class="inner">
<h1 id="logo-wrap">
<a href="/" id="logo">kent每天都要进步</a>
</h1>
<h2 id="subtitle-wrap">
<a href="/" id="subtitle">ITer的觉悟</a>
</h2>
</div>
<div id="header-inner" class="inner">
<nav id="main-nav">
<a id="main-nav-toggle" class="nav-icon"></a>
<a class="main-nav-link" href="/">Home</a>
<a class="main-nav-link" href="/archives">Archives</a>
</nav>
<nav id="sub-nav">
<a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS Feed"></a>
<a id="nav-search-btn" class="nav-icon" title="Suche"></a>
</nav>
<div id="search-form-wrap">
<form action="//google.com/search" method="get" accept-charset="UTF-8" class="search-form"><input type="search" name="q" results="0" class="search-form-input" placeholder="Search"><button type="submit" class="search-form-submit"></button><input type="hidden" name="sitesearch" value="http://blog.xuguruogu.com"></form>
</div>
</div>
</div>
</header>
<div class="outer">
<section id="main">
<article id="post-kedis" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/05/12/kedis/" class="article-date">
<time datetime="2017-05-12T09:57:08.000Z" itemprop="datePublished">2017-05-12</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/05/12/kedis/">kedis: 一个更优雅的redis cluster proxy</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<h1 id="redis-cluster简介"><a href="#redis-cluster简介" class="headerlink" title="redis cluster简介"></a>redis cluster简介</h1><p>Redis的开源代码包含三种运行模式</p>
<ul>
<li>Standalone:用户的请求直接访问内存数据并返回。</li>
<li>Sentinel:作为redis的监控身份运行,监控多个redis实现raft协议完成故障恢复。这个模式下采用了hiredis访问redis。</li>
<li>Cluster:redis开启了两个端口,一个用于用户访问,一个用于交换gossip协议。Redis之间通过二进制流交换信息。</li>
</ul>
<p>以下是redis cluster的网络拓扑模型,client直连redis,并实现cluster协议(处理moved ask)。遗憾的是大部分redis客户端没有实现cluster协议。<br><img src="/img/kedis/cluster.jpeg" alt="redis cluster"></p>
<p>kedis是一个优雅的redis cluster proxy解决方案,它在代码层级上和db/sentinel/cluster并列,并在事件循环中挂钩。kedis接收client的请求并维护request队列,写入到后端redis的backend并维护callback队列,redis的返回会触发相应的client调用。</p>
<h1 id="kedis代码结构图"><a href="#kedis代码结构图" class="headerlink" title="kedis代码结构图"></a>kedis代码结构图</h1><p><img src="/img/kedis/kedis.jpeg" alt="kedis"></p>
<p>上图中的proxy和backend两部分是kedis proxy的核心功能,其他为redis自带模块,只做了很少的修改。</p>
<h1 id="kedis代码演进"><a href="#kedis代码演进" class="headerlink" title="kedis代码演进"></a>kedis代码演进</h1><p>第一版kedis我用hiredis作为redis异步客户端。然而深入到hiredis发现这个库为了实现通用性和接口的清晰放弃了性能的最优。举例来说,在read()调用时hiredis会首先把数据读到栈空间,然后feed到read buffer。再比如,每一次调用回调函数时会要求数据被拷贝走,hiredis默认释放掉reply的内存。这个版本没做性能测试。</p>
<h2 id="memcoy-优化:"><a href="#memcoy-优化:" class="headerlink" title="memcoy()优化:"></a>memcoy()优化:</h2><p>先观察一下使用hiredis的kedis内存拷贝次数</p>
<ul>
<li>request:tcp协议栈 –> read buffer -> redis object -> hiredis的format函数调用栈转为redis协议字符串 –> write buffer -> tcp协议栈</li>
<li>reply: tcp协议栈 -> 函数调用栈buffer –> feed到read buffer -> reply object –> 回调函数需拷贝reply内容 -> client write buffer -> tcp协议栈</li>
</ul>
<p>这种模式下转发一次请求需要拷贝数据5次,转发一次响应需要拷贝数据7次。</p>
<p>优化之后的kedis将redis协议栈代码拿了出来。对其内存管理大做文章,其内存转移模型变为:</p>
<ul>
<li>request:tcp协议栈-> read buffer-> redis object->write buffer->tcp协议栈</li>
<li>reply: tcp协议栈-> read buffer-> redis object/raw string->write buffer->tcp协议栈</li>
</ul>
<p>优化之后请求和转发都只需要拷贝4次数据。这一次进行性能测试kedis的单核性能已经达到了12w qps,和redis cluster单实例的性能基本一致。</p>
<h2 id="malloc-调用次数优化"><a href="#malloc-调用次数优化" class="headerlink" title="malloc()调用次数优化"></a>malloc()调用次数优化</h2><p>对于mget这样的多个请求key的命令,proxy要做拆分并转发,最后拼接返回给客户端。但是对于hgetll,set这样只有一个key的命令,proxy完全可以做到不感知返回内容,直接转发给客户端。</p>
<p>做完这个优化,在lrange_300的测试环境下,redis一次返回300个元素,kedis吞吐量提升了5倍。</p>
<h2 id="client回写优化"><a href="#client回写优化" class="headerlink" title="client回写优化"></a>client回写优化</h2><p>这部分完全复用了redis的代码。核心思想为以下三点:</p>
<ul>
<li>减少write()调用,每次事件循环将数据写入writer buffer,在调用epoll前调用write()</li>
<li>不使用calloc(),writer buffer采用链式结构。</li>
<li>少量优先,每次事件循环每个客户端最多写16k数据,避免饿死请求量少的client。</li>
</ul>
<p>一个新产品面世当然要和老产品作比较,鉴于codis没有对hgetall, lrange这样的多元素返回的命令做优化,我们就拿codis最擅长的get,set命令对比。在下面这个测试报告的基础上可以得出结论:</p>
<p><strong>kedis对CPU做到了更有效的利用,其单核性能比codis 20核qps高与此同时延时更低。</strong></p>
<h2 id="测试报告"><a href="#测试报告" class="headerlink" title="测试报告"></a>测试报告</h2><p>kedis: CPU占用100%, qps: 125078</p>
<p>补充一下:kedis是redis的延续,是单线程。</p>
<p>redis-benchmark -h 100.69.89.31 -p 36379 -n 1000000 -d 20 -r 200000 -e -l -c 200 -t get,set</p>
<p>====== SET ======<br>1000000 requests completed in 7.99 seconds<br>200 parallel clients<br>20 bytes payload<br>keep alive: 1<br>42.32% <= 1 milliseconds<br>99.64% <= 2 milliseconds<br>99.99% <= 3 milliseconds<br>100.00% <= 3 milliseconds<br>125078.17 requests per second</p>
<p>====== GET ======<br>1000000 requests completed in 8.05 seconds<br>200 parallel clients<br>20 bytes payload<br>keep alive: 1<br>48.97% <= 1 milliseconds<br>99.75% <= 2 milliseconds<br>99.98% <= 3 milliseconds<br>100.00% <= 3 milliseconds<br>124285.37 requests per second</p>
<p>codis:分配20个核,占用1500%, qps: 114692</p>
<p>redis-benchmark -h 100.69.89.31 -p 3000 -n 1000000 -d 20 -r 200000 -e -l -c 200 -t get,set</p>
<p>====== SET ======<br>1000000 requests completed in 8.72 seconds<br>200 parallel clients<br>20 bytes payload<br>keep alive: 1<br>72.64% <= 1 milliseconds<br>98.96% <= 2 milliseconds<br>99.20% <= 3 milliseconds<br>99.26% <= 4 milliseconds<br>99.29% <= 5 milliseconds<br>99.32% <= 6 milliseconds<br>99.34% <= 7 milliseconds<br>99.36% <= 8 milliseconds<br>99.38% <= 9 milliseconds<br>99.44% <= 10 milliseconds<br>99.52% <= 11 milliseconds<br>99.57% <= 12 milliseconds<br>99.60% <= 13 milliseconds<br>99.67% <= 14 milliseconds<br>99.74% <= 15 milliseconds<br>99.80% <= 16 milliseconds<br>99.83% <= 17 milliseconds<br>99.85% <= 18 milliseconds<br>99.86% <= 19 milliseconds<br>99.88% <= 20 milliseconds<br>99.89% <= 21 milliseconds<br>99.91% <= 22 milliseconds<br>99.94% <= 23 milliseconds<br>99.96% <= 24 milliseconds<br>99.98% <= 25 milliseconds<br>100.00% <= 26 milliseconds<br>100.00% <= 27 milliseconds<br>100.00% <= 27 milliseconds<br>114692.05 requests per second</p>
<p>====== GET ======<br>1000000 requests completed in 8.71 seconds<br>200 parallel clients<br>20 bytes payload<br>keep alive: 1<br>71.84% <= 1 milliseconds<br>99.01% <= 2 milliseconds<br>99.23% <= 3 milliseconds<br>99.29% <= 4 milliseconds<br>99.32% <= 5 milliseconds<br>99.35% <= 6 milliseconds<br>99.37% <= 7 milliseconds<br>99.39% <= 8 milliseconds<br>99.42% <= 9 milliseconds<br>99.48% <= 10 milliseconds<br>99.56% <= 11 milliseconds<br>99.62% <= 12 milliseconds<br>99.66% <= 13 milliseconds<br>99.72% <= 14 milliseconds<br>99.80% <= 15 milliseconds<br>99.86% <= 16 milliseconds<br>99.90% <= 17 milliseconds<br>99.92% <= 18 milliseconds<br>99.93% <= 19 milliseconds<br>99.94% <= 20 milliseconds<br>99.95% <= 21 milliseconds<br>99.96% <= 22 milliseconds<br>99.97% <= 23 milliseconds<br>99.98% <= 24 milliseconds<br>99.98% <= 25 milliseconds<br>99.99% <= 26 milliseconds<br>100.00% <= 27 milliseconds<br>100.00% <= 28 milliseconds<br>100.00% <= 28 milliseconds<br>114771.03 requests per second</p>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/05/12/kedis/" data-id="cj2lttzg300007ss607x9sjfv" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/05/12/kedis/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/c/">c</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/cluster/">cluster</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/proxy/">proxy</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/redis/">redis</a></li></ul>
</footer>
</div>
</article>
<article id="post-linux sync、fsync与fdatasync" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/04/11/linux sync、fsync与fdatasync/" class="article-date">
<time datetime="2017-04-11T02:02:18.000Z" itemprop="datePublished">2017-04-11</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/04/11/linux sync、fsync与fdatasync/">linux sync、fsync与fdatasync</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>传统的UNIX实现在内核中设有缓冲区高速缓存或页面高速缓存,大多数磁盘I/O都通过缓冲进行。当将数据写入文件时,内核通常先将该数据复制到其中一个缓冲区中,如果该缓冲区尚未写满,则并不将其排入输出队列,而是等待其写满或者当内核需要重用该缓冲区以便存放其他磁盘块数据时,再将该缓冲排入输出队列,然后待其到达队首时,才进行实际的I/O操作。这种输出方式被称为延迟写(delayed write)(Bach [1986]第3章详细讨论了缓冲区高速缓存)。<br>延迟写减少了磁盘读写次数,但是却降低了文件内容的更新速度,使得欲写到文件中的数据在一段时间内并没有写到磁盘上。当系统发生故障时,这种延迟可能造成文件更新内容的丢失。为了保证磁盘上实际文件系统与缓冲区高速缓存中内容的一致性,UNIX系统提供了sync、fsync和fdatasync三个函数。<br>sync函数只是将所有修改过的块缓冲区排入写队列,然后就返回,它并不等待实际写磁盘操作结束。<br>通常称为update的系统守护进程会周期性地(一般每隔30秒)调用sync函数。这就保证了定期冲洗内核的块缓冲区。命令sync(1)也调用sync函数。<br>fsync函数只对由文件描述符filedes指定的单一文件起作用,并且等待写磁盘操作结束,然后返回。fsync可用于数据库这样的应用程序,这种应用程序需要确保将修改过的块立即写到磁盘上。<br>fdatasync函数类似于fsync,但它只影响文件的数据部分。而除数据外,fsync还会同步更新文件的属性。<br>对于提供事务支持的数据库,在事务提交时,都要确保事务日志(包含该事务所有的修改操作以及一个提交记录)完全写到硬盘上,才认定事务提交成功并返回给应用层。</p>
<p>一个简单的问题:在*nix操作系统上,怎样保证对文件的更新内容成功持久化到硬盘?</p>
<ol>
<li>write不够,需要fsync<br>一般情况下,对硬盘(或者其他持久存储设备)文件的write操作,更新的只是内存中的页缓存(page cache),而脏页面不会立即更新到硬盘中,而是由操作系统统一调度,如由专门的flusher内核线程在满足一定条件时(如一定时间间隔、内存中的脏页达到一定比例)内将脏页面同步到硬盘上(放入设备的IO请求队列)。<br>因为write调用不会等到硬盘IO完成之后才返回,因此如果OS在write调用之后、硬盘同步之前崩溃,则数据可能丢失。虽然这样的时间窗口很小,但是对于需要保证事务的持久化(durability)和一致性(consistency)的数据库程序来说,write()所提供的“松散的异步语义”是不够的,通常需要OS提供的同步IO(synchronized-IO)原语来保证:</li>
</ol>
<figure class="highlight c"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><unistd.h></span></span></div><div class="line"><span class="function"><span class="keyword">int</span> <span class="title">fsync</span><span class="params">(<span class="keyword">int</span> fd)</span></span>;</div></pre></td></tr></table></figure>
<p>fsync的功能是确保文件fd所有已修改的内容已经正确同步到硬盘上,该调用会阻塞等待直到设备报告IO完成。</p>
<p>PS:如果采用内存映射文件的方式进行文件IO(使用mmap,将文件的page cache直接映射到进程的地址空间,通过写内存的方式修改文件),也有类似的系统调用来确保修改的内容完全同步到硬盘之上:</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="meta">#incude <span class="meta-string"><sys/mman.h></span></span></div><div class="line"><span class="function"><span class="keyword">int</span> <span class="title">msync</span><span class="params">(<span class="keyword">void</span> *addr, <span class="keyword">size_t</span> length, <span class="keyword">int</span> flags)</span></span></div></pre></td></tr></table></figure>
<p>msync需要指定同步的地址区间,如此细粒度的控制似乎比fsync更加高效(因为应用程序通常知道自己的脏页位置),但实际上(Linux)kernel中有着十分高效的数据结构,能够很快地找出文件的脏页,使得fsync只会同步文件的修改内容。</p>
<ol>
<li>fsync的性能问题,与fdatasync<br>除了同步文件的修改内容(脏页),fsync还会同步文件的描述信息(metadata,包括size、访问时间st_atime & st_mtime等等),因为文件的数据和metadata通常存在硬盘的不同地方,因此fsync至少需要两次IO写操作,fsync的man page这样说:</li>
</ol>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">"Unfortunately fsync() will always initialize two write operations : one for the newly written data and another one in order to update the modification time stored in the inode. If the modification time is not a part of the transaction concept fdatasync() can be used to avoid unnecessary inode disk write operations."</div></pre></td></tr></table></figure>
<p>多余的一次IO操作,有多么昂贵呢?根据Wikipedia的数据,当前硬盘驱动的平均寻道时间(Average seek time)大约是3~15ms,7200RPM硬盘的平均旋转延迟(Average rotational latency)大约为4ms,因此一次IO操作的耗时大约为10ms左右。这个数字意味着什么?下文还会提到。</p>
<p>Posix同样定义了fdatasync,放宽了同步的语义以提高性能:</p>
<figure class="highlight c"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div></pre></td><td class="code"><pre><div class="line"><span class="meta">#<span class="meta-keyword">include</span> <span class="meta-string"><unistd.h></span></span></div><div class="line"><span class="function"><span class="keyword">int</span> <span class="title">fdatasync</span><span class="params">(<span class="keyword">int</span> fd)</span></span>;</div></pre></td></tr></table></figure>
<p>fdatasync的功能与fsync类似,但是仅仅在必要的情况下才会同步metadata,因此可以减少一次IO写操作。那么,什么是“必要的情况”呢?根据man page中的解释:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">"fdatasync does not flush modified metadata unless that metadata is needed in order to allow a subsequent data retrieval to be corretly handled."</div></pre></td></tr></table></figure>
<p>举例来说,文件的尺寸(st_size)如果变化,是需要立即同步的,否则OS一旦崩溃,即使文件的数据部分已同步,由于metadata没有同步,依然读不到修改的内容。而最后访问时间(atime)/修改时间(mtime)是不需要每次都同步的,只要应用程序对这两个时间戳没有苛刻的要求,基本无伤大雅。</p>
<p>PS:open时的参数O_SYNC/O_DSYNC有着和fsync/fdatasync类似的语义:使每次write都会阻塞等待硬盘IO完成。(实际上,Linux对O_SYNC/O_DSYNC做了相同处理,没有满足Posix的要求,而是都实现了fdatasync的语义)相对于fsync/fdatasync,这样的设置不够灵活,应该很少使用。</p>
<ol>
<li>使用fdatasync优化日志同步<br>文章开头时已提到,为了满足事务要求,数据库的日志文件是常常需要同步IO的。由于需要同步等待硬盘IO完成,所以事务的提交操作常常十分耗时,成为性能的瓶颈。</li>
</ol>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div></pre></td><td class="code"><pre><div class="line">在Berkeley DB下,如果开启了AUTO_COMMIT(所有独立的写操作自动具有事务语义)并使用默认的同步级别(日志完全同步到硬盘才返回),写一条记录的耗时大约为5~10ms级别,基本和一次IO操作(10ms)的耗时相同。</div></pre></td></tr></table></figure>
<p> 我们已经知道,在同步上fsync是低效的。但是如果需要使用fdatasync减少对metadata的更新,则需要确保文件的尺寸在write前后没有发生变化。日志文件天生是追加型(append-only)的,总是在不断增大,似乎很难利用好fdatasync。</p>
<p>且看Berkeley DB是怎样处理日志文件的:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div></pre></td><td class="code"><pre><div class="line">1.每个log文件固定为10MB大小,从1开始编号,名称格式为“log.%010d"</div><div class="line">2.每次log文件创建时,先写文件的最后1个page,将log文件扩展为10MB大小</div><div class="line">3.向log文件中追加记录时,由于文件的尺寸不发生变化,使用fdatasync可以大大优化写log的效率</div><div class="line">4.如果一个log文件写满了,则新建一个log文件,也只有一次同步metadata的开销</div></pre></td></tr></table></figure>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/04/11/linux sync、fsync与fdatasync/" data-id="cj2ltcraz0004vts6vzdd1vjv" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/04/11/linux sync、fsync与fdatasync/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/c/">c</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-proc-buddyinfo理解" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/04/03/proc-buddyinfo理解/" class="article-date">
<time datetime="2017-04-03T01:43:04.000Z" itemprop="datePublished">2017-04-03</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/04/03/proc-buddyinfo理解/">/proc/buddyinfo理解</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>/proc/buddyinfo<br>This file is used primarily for diagnosing memory fragmentation issues. Using the buddy algorithm, each column represents the number of pages of a certain order (a certain size) that are available at any given time. For example, for zone DMA (direct memory access), there are 90 of 2^(0<em>PAGE_SIZE) chunks of memory. Similarly, there are 6 of 2^(1</em>PAGE_SIZE) chunks, and 2 of 2^(2*PAGE_SIZE) chunks of memory available.</p>
<p>The DMA row references the first 16 MB on a system, the HighMem row references all memory greater than 4 GB on a system, and the Normal row references all memory in between.</p>
<p>Each column of numbers represents the number of pages of that order which are available. In the example below, there are 7 chunks of 2 ^ 0 <em> PAGE_SIZE available in ZONE_DMA, and 12 chunks of 2 ^ 3 </em> PAGE_SIZE available in ZONE_NORMAL, etc…</p>
<p>This information can give you a good idea about how fragmented memory is and give you a clue as to how big of an area you can safely allocate.</p>
<p>When a Linux system has been running for a while memory fragmentation can increase which depends heavily on the nature of the applications that are running on it. The more processes allocate and free memory, the quicker memory becomes fragmented. And the kernel may not always be able to defragment enough memory for a requested size on time. If that happens, applications may not be able to allocate larger contiguous chunks of memory even though there is enough free memory available. Starting with the 2.6 kernel, i.e. RHEL4 and SLES9, memory management has improved tremendously and memory fragmentation has become less of an issue.</p>
<p>To see memory fragmentation you can use the magic SysRq key. Simply execute the following command:</p>
<h1 id="echo-m-gt-proc-sysrq-triggerThis-command-will-dump-current-memory-information-to-var-log-messages-Here-is-an-example-of-a-RHEL3-32-bit-system"><a href="#echo-m-gt-proc-sysrq-triggerThis-command-will-dump-current-memory-information-to-var-log-messages-Here-is-an-example-of-a-RHEL3-32-bit-system" class="headerlink" title="echo m > /proc/sysrq-triggerThis command will dump current memory information to /var/log/messages. Here is an example of a RHEL3 32-bit system:"></a>echo m > /proc/sysrq-triggerThis command will dump current memory information to /var/log/messages. Here is an example of a RHEL3 32-bit system:</h1><p>Jul 23 20:19:30 localhost kernel: 0<em>4kB 0</em>8kB 0<em>16kB 1</em>32kB 0<em>64kB 1</em>128kB 1<em>256kB 1</em>512kB 1<em>1024kB 0</em>2048kB 0<em>4096kB = 1952kB)<br>Jul 23 20:19:30 localhost kernel: 1395</em>4kB 355<em>8kB 209</em>16kB 15<em>32kB 0</em>64kB 0<em>128kB 0</em>256kB 0<em>512kB 0</em>1024kB 0<em>2048kB 0</em>4096kB = 12244kB)<br>Jul 23 20:19:31 localhost kernel: 1479<em>4kB 673</em>8kB 205<em>16kB 73</em>32kB 21<em>64kB 847</em>128kB 473<em>256kB 92</em>512kB 164<em>1024kB 64</em>2048kB 28*4096kB = 708564kB)The first line shows DMA memory fragmentation. The second line shows Low Memory fragmentation and the third line shows High Memory fragmentation. The output shows memory fragmentation in the Low Memory area. But there are many large memory chunks available in the High Memory area, e.g. 28 4MB.</p>
<p>If memory information was not dumped to /var/log/messages, then SysRq was not enabled. You can enable SysRq by setting sysrq to 1:</p>
<h1 id="echo-1-gt-proc-sys-kernel-sysrqStarting-with-the-2-6-kernel-i-e-RHEL4-and-SLES9-you-don’t-need-SysRq-to-dump-memory-information-You-can-simply-check-proc-buddyinfo-for-memory-fragmentation"><a href="#echo-1-gt-proc-sys-kernel-sysrqStarting-with-the-2-6-kernel-i-e-RHEL4-and-SLES9-you-don’t-need-SysRq-to-dump-memory-information-You-can-simply-check-proc-buddyinfo-for-memory-fragmentation" class="headerlink" title="echo 1 > /proc/sys/kernel/sysrqStarting with the 2.6 kernel, i.e. RHEL4 and SLES9, you don’t need SysRq to dump memory information. You can simply check /proc/buddyinfo for memory fragmentation."></a>echo 1 > /proc/sys/kernel/sysrqStarting with the 2.6 kernel, i.e. RHEL4 and SLES9, you don’t need SysRq to dump memory information. You can simply check /proc/buddyinfo for memory fragmentation.</h1><p>Here is the output of a 64-bit server running the 2.6 kernel:</p>
<h1 id="cat-proc-buddyinfo"><a href="#cat-proc-buddyinfo" class="headerlink" title="cat /proc/buddyinfo"></a>cat /proc/buddyinfo</h1><pre><code>Node 0, zone DMA 5 4 3 4 3 2 1 0 1 1 2
Node 0, zone Normal 1046 527 128 36 17 5 26 40 13 16 94
</code></pre><h1 id="echo-m-gt-proc-sysrq-trigger"><a href="#echo-m-gt-proc-sysrq-trigger" class="headerlink" title="echo m > /proc/sysrq-trigger"></a>echo m > /proc/sysrq-trigger</h1><h1 id="grep-Normal-var-log-messages-tail-1"><a href="#grep-Normal-var-log-messages-tail-1" class="headerlink" title="grep Normal /var/log/messages | tail -1"></a>grep Normal /var/log/messages | tail -1</h1><pre><code>Jul 23 21:42:26 localhost kernel: Normal: 1046*4kB 529*8kB 129*16kB 36*32kB 17*64kB 5*128kB 26*256kB 40*512kB 13*1024kB 16*2048kB 94*4096kB = 471600kB
</code></pre><p>#In this example I used SysRq again to show what each number in /proc/buddyinfo is referring</p>
<p>根据以上文章理解内存碎片是怎么回事了。<br>471600kb就是可用的内存(可以连续分配的内存地址空间),以4kb,8,16,32,64,128,256,512,1024,2048,4096kb为单位的所有可分配内存之和。</p>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/04/03/proc-buddyinfo理解/" data-id="cj2ltcrb9000avts6gzt7g0m9" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/04/03/proc-buddyinfo理解/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/memory/">memory</a></li></ul>
</footer>
</div>
</article>
<article id="post-linux 查看系统硬件信息" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/linux 查看系统硬件信息/" class="article-date">
<time datetime="2017-03-30T08:12:49.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/linux 查看系统硬件信息/">linux 查看系统硬件信息</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<ul>
<li><p>cpu<br>lscpu命令,查看的是cpu的统计信息.</p>
<p> Architecture: x86_64 #cpu架构<br> CPU op-mode(s): 32-bit, 64-bit<br> Byte Order: Little Endian #小尾序<br> CPU(s): 48 #总共有48核<br> On-line CPU(s) list: 0-47<br> Thread(s) per core: 2 #每个cpu核,支持2个线程,即支持超线程<br> Core(s) per socket: 12 #每个cpu,有12个核<br> Socket(s): 2 #总共有2一个cpu<br> NUMA node(s): 1 #没有开启MUMA<br> Vendor ID: GenuineIntel #cpu产商 intel<br> CPU family: 6<br> Model: 63<br> Stepping: 2<br> CPU MHz: 2294.719<br> BogoMIPS: 4589.37<br> Virtualization: VT-x #支持cpu虚拟化技术<br> L1d cache: 32K<br> L1i cache: 32K<br> L2 cache: 256K<br> L3 cache: 30720K<br> NUMA node0 CPU(s): 0-47</p>
</li>
<li><p>磁盘<br>查看硬盘和分区分布</p>
<h1 id="lsblk"><a href="#lsblk" class="headerlink" title="lsblk"></a>lsblk</h1><p> NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT<br> sda 8:0 0 278.5G 0 disk<br> ├─sda1 8:1 0 953.7M 0 part /boot<br> ├─sda2 8:2 0 54G 0 part /<br> ├─sda3 8:3 0 7.5G 0 part<br> ├─sda4 8:4 0 111.8G 0 part /tmp<br> └─sda5 8:5 0 104.3G 0 part /var/log<br> sdb 8:16 0 2.2T 0 disk<br> └─sdb1 8:17 0 2.2T 0 part /home</p>
</li>
<li><p>网卡<br>查看网卡硬件信息</p>
<h1 id="lspci-grep-i-‘eth’"><a href="#lspci-grep-i-‘eth’" class="headerlink" title="lspci | grep -i ‘eth’"></a>lspci | grep -i ‘eth’</h1><p> 02:00.0 Ethernet controller: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2 (rev 01)<br> 02:00.1 Ethernet controller: Intel Corporation Ethernet Controller 10-Gigabit X540-AT2 (rev 01)</p>
</li>
</ul>
<p>如果要查看某个网络接口的详细信息,例如eth0的详细参数和指标</p>
<pre><code># ethtool eth0
Settings for eth0:
Supported ports: [ TP ]
Supported link modes: 100baseT/Full
1000baseT/Full
10000baseT/Full #支持万兆全双工模式
Supported pause frame use: No
Supports auto-negotiation: Yes #支持自适应模式,一般都支持
Advertised link modes: 100baseT/Full
1000baseT/Full
10000baseT/Full
Advertised pause frame use: No
Advertised auto-negotiation: Yes #默认使用自适应模式
Speed: 10000Mb/s #现在网卡的速度是10000Mb,网卡使用自适应模式
Duplex: Full #全双工
Port: Twisted Pair
PHYAD: 0
Transceiver: external
Auto-negotiation: on
MDI-X: Unknown
Supports Wake-on: umbg
Wake-on: g
Current message level: 0x00000007 (7)
drv probe link
Link detected: yes #表示有网线连接,和路由是通的
</code></pre><p>查看bios信息</p>
<pre><code># dmidecode -t bios
# dmidecode 2.12
SMBIOS 2.8 present.
Handle 0x001D, DMI type 0, 24 bytes
BIOS Information
Vendor: Insyde Corp.
Version: 1.57
Release Date: 08/11/2015
Address: 0xE0000
Runtime Size: 128 kB
ROM Size: 16384 kB
Characteristics:
PCI is supported
BIOS is upgradeable
BIOS shadowing is allowed
Boot from CD is supported
Selectable boot is supported
EDD is supported
Japanese floppy for NEC 9800 1.2 MB is supported (int 13h)
Japanese floppy for Toshiba 1.2 MB is supported (int 13h)
5.25"/360 kB floppy services are supported (int 13h)
5.25"/1.2 MB floppy services are supported (int 13h)
3.5"/720 kB floppy services are supported (int 13h)
3.5"/2.88 MB floppy services are supported (int 13h)
8042 keyboard services are supported (int 9h)
CGA/mono video services are supported (int 10h)
ACPI is supported
USB legacy is supported
BIOS boot specification is supported
Targeted content distribution is supported
UEFI is supported
BIOS Revision: 1.0
Handle 0x0026, DMI type 13, 22 bytes
BIOS Language Information
Language Description Format: Long
Installable Languages: 2
en|US|iso8859-1
zh|CN|unicode
Currently Installed Language: en|US|iso8859-1
</code></pre><p>dmidecode以一种可读的方式dump出机器的DMI(Desktop Management Interface)信息。这些信息包括了硬件以及BIOS,既可以得到当前的配置,也可以得到系统支持的最大配置,比如说支持的最大内存数等。</p>
<p>如果要查看所有有用信息</p>
<pre><code>dmidecode -q
</code></pre>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/linux 查看系统硬件信息/" data-id="cj2ltcrat0001vts61p8a1y21" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/linux 查看系统硬件信息/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-mpstat命令" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/mpstat命令/" class="article-date">
<time datetime="2017-03-30T07:35:35.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/mpstat命令/">linux性能测试 mpstat命令</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>mpstat是MultiProcessor Statistics的缩写,是实时系统监控工具。其报告与CPU的一些统计信息,这些信息存放在/proc/stat文件中。在多CPUs系统里,其不但能查看所有CPU的平均状况信息,而且能够查看特定CPU的信息。<br>下面只介绍mpstat与CPU相关的参数,mpstat的语法如下:</p>
<pre><code>Usage: mpstat [ options... ] [ <interval> [ <count> ] ]
Options are:
[ -P { <cpu> | ALL } ] [ -V ]
</code></pre><p>参数的含义如下:<br>参数 解释<br>-P {|ALL} 表示监控哪个CPU, cpu在[0,cpu个数-1]中取值<br>internal 相邻的两次采样的间隔时间<br>count 采样的次数,count只能和delay一起使用</p>
<p>当没有参数时,mpstat则显示系统启动以后所有信息的平均值。有interval时,第一行的信息自系统启动以来的平均信息。<br>从第二行开始,输出为前一个interval时间段的平均信息。</p>
<p>与CPU有关的输出的含义如下:<br>参数 解释 从/proc/stat获得数据<br>CPU 处理器ID<br>user 在internal时间段里,用户态的CPU时间(%),不包含 nice值为负 进程 (usr/total)<em>100<br>nice 在internal时间段里,nice值为负进程的CPU时间(%) (nice/total)</em>100<br>system 在internal时间段里,核心时间(%) (system/total)<em>100<br>iowait 在internal时间段里,硬盘IO等待时间(%) (iowait/total)</em>100<br>irq 在internal时间段里,硬中断时间(%) (irq/total)<em>100<br>soft 在internal时间段里,软中断时间(%) (softirq/total)</em>100<br>idle 在internal时间段里,CPU除去等待磁盘IO操作外的因为任何原因而空闲的时间闲置时间(%)(idle/total)<em>100<br>intr/s 在internal时间段里,每秒CPU接收的中断的次数intr/total)</em>100<br>CPU总的工作时间=total_cur=user+system+nice+idle+iowait+irq+softirq<br>total_pre=pre_user+ pre_system+ pre_nice+ pre_idle+ pre_iowait+ pre_irq+ pre_softirq<br>user=user_cur – user_pre<br>total=total_cur-total_pre<br>其中_cur 表示当前值,_pre表示interval时间前的值。上表中的所有值可取到两位小数点。</p>
<p>范例1:average mode (粗略信息)<br>当mpstat不带参数时,输出为从系统启动以来的平均值。</p>
<pre><code>[root@C44 ~]# mpstat
Linux 2.6.14.7-selinux1-WR1.4aq_cgl (MSP) 07/26/12
12:47:05 CPU %user %nice %sys %iowait %irq %soft %idle intr/s
12:47:05 all 2.98 0.00 2.68 2.12 0.05 0.31 91.87 391.82
</code></pre><p>范例2: 每2秒产生了2个处理器的统计数据报告<br>下面的命令可以每2秒产生了2个处理器的统计数据报告,一共产生三个interval 的信息,然后再给出这三个interval的平<br>均信息。默认时,输出是按照CPU 号排序。第一个行给出了从系统引导以来的所有活跃数据。接下来每行对应一个处理器的<br>活跃状态。。</p>
<pre><code>[root@C44 ~]# mpstat -P ALL 2 3
Linux 2.6.14.7-selinux1-WR1.4aq_cgl (MSP) 07/26/12
12:47:11 CPU %user %nice %sys %iowait %irq %soft %idle intr/s
12:47:13 all 1.51 0.00 0.76 0.00 0.00 0.25 97.48 296.50
12:47:13 0 2.50 0.00 2.00 0.00 0.00 0.50 95.00 296.50
12:47:13 1 1.00 0.00 0.00 0.00 0.00 0.50 98.00 0.00
12:47:13 CPU %user %nice %sys %iowait %irq %soft %idle intr/s
12:47:15 all 0.50 0.00 0.25 0.00 0.00 0.00 99.24 295.45
12:47:15 0 1.01 0.00 0.00 0.00 0.00 0.00 98.99 295.45
12:47:15 1 0.00 0.00 0.00 0.00 0.00 0.00 100.00 0.00
12:47:15 CPU %user %nice %sys %iowait %irq %soft %idle intr/s
12:47:17 all 0.51 0.00 0.76 0.25 0.00 0.25 98.23 299.49
12:47:17 0 1.01 0.00 1.01 0.00 0.00 0.51 97.47 299.49
12:47:17 1 0.00 0.00 1.01 0.00 0.00 0.00 99.49 0.00
Average: CPU %user %nice %sys %iowait %irq %soft %idle intr/s
Average: all 0.84 0.00 0.59 0.08 0.00 0.17 98.32 297.15
Average: 0 1.51 0.00 1.01 0.00 0.00 0.34 97.15 297.15
Average: 1 0.34 0.00 0.34 0.00 0.00 0.17 99.16 0.00
</code></pre>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/mpstat命令/" data-id="cj2ltcrb70008vts6c8s7e5im" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/mpstat命令/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/cpu/">cpu</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-网卡队列RSS-RPS-RFS" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/网卡队列RSS-RPS-RFS/" class="article-date">
<time datetime="2017-03-30T07:25:27.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/网卡队列RSS-RPS-RFS/">网卡队列RSS/RPS/RFS</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>网卡优化<br>RSS receive side scaling,网卡多队列,需要硬件支持。网卡接收到网络数据包后,要发送一个硬件中断,通知CPU取数据包。默认配置,都是由CPU0去做。<br>RPS receive packet steering,向某个CPU发送一个软中断,来接收数据包,并递交给应用程序。<br>RFS receive flow streering,维护两种hash表,实现将软中断分散到多颗CPU上去处理。</p>
<ul>
<li>选择支持msi-x中断方式的网卡类型<br>#lspci –v</li>
<li>网卡需要支持多队列<br>#lspci –vvv<br>如果有MSI-X && Enable+ && TabSize > 1,则该网卡是多队列网卡</li>
<li><p>2.6.35 以后的内核版本都支持google的RPS/RFS补丁,RHEL6.1以后。这个功能默认关闭需要手工开启<br>开启RPS(两颗4c的CPU)<br>#echo ff > /sys/class/net/eth0/queues/rx-0/rps_cpus<br>开启RFS(内存大的机器可以设置大于4096)<br>#echo 4096 > /sys/class/net/eth0/queues/rx-0/rps_flow_cnt<br>4096*N(N网卡队列数# cat /proc/interrupts | grep eth0)<br>#echo 32768 > /proc/sys/net/core/rps_sock_flow_entries</p>
</li>
<li><p><a href="http://blog.netzhou.net/?p=181" target="_blank" rel="external">http://blog.netzhou.net/?p=181</a></p>
</li>
<li><a href="http://blog.csdn.net/turkeyzhou/article/details/7528182" target="_blank" rel="external">http://blog.csdn.net/turkeyzhou/article/details/7528182</a></li>
<li><a href="https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Performance_Tuning_Guide/index.html" target="_blank" rel="external">https://access.redhat.com/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Performance_Tuning_Guide/index.html</a></li>
</ul>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/网卡队列RSS-RPS-RFS/" data-id="cj2ltcrbe000evts6iwa3twpr" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/网卡队列RSS-RPS-RFS/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/tcp/">tcp</a></li></ul>
</footer>
</div>
</article>
<article id="post-proc-irq-number-smp-affinity" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/proc-irq-number-smp-affinity/" class="article-date">
<time datetime="2017-03-30T05:56:55.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/proc-irq-number-smp-affinity/">/proc/irq/{number}/smp_affinity</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>在多 CPU 的环境中,还有一个中断平衡的问题,比如,网卡中断会教给哪个 CPU 处理,这个参数控制哪些 CPU 可以绑定 IRQ 中断。其中的 {number} 是对应设备的中断编号,可以用下面的命令找出:<br>cat /proc/interrupt<br>比如,一般 eth0 的 IRQ 编号是 16,所以控制 eth0 中断绑定的 /proc 文件名是 /proc/irq/16/smp_affinity。上面这个命令还可以看到某些中断对应的CPU处理的次数,缺省的时候肯定是不平衡的。<br>设置其值的方法很简单,smp_affinity 自身是一个位掩码(bitmask),特定的位对应特定的 CPU,这样,01 就意味着只有第一个 CPU 可以处理对应的中断,而 0f(0x1111)意味着四个 CPU 都会参与中断处理。<br>几乎所有外设都有这个参数设置,可以关注一下。<br>这个数值的推荐设置,其实在很大程度上,让专门的CPU处理专门的中断是效率最高的,比如,给磁盘IO一个CPU,给网卡一个CPU,这样是比较合理的。</p>
<p><a href="https://cs.uwaterloo.ca/~brecht/servers/apic/SMP-affinity.txt" target="_blank" rel="external">SMP IRQ Affinity</a></p>
<p>Background: </p>
<p>Whenever a piece of hardware, such as disk controller or ethernet card,<br>needs attention from the CPU, it throws an interrupt. The interrupt tells<br>the CPU that something has happened and that the CPU should drop what<br>it’s doing to handle the event. In order to prevent mutliple devices from<br>sending the same interrupts, the IRQ system was established where each device<br>in a computer system is assigned its own special IRQ so that its interrupts<br>are unique.</p>
<p>Starting with the 2.4 kernel, Linux has gained the ability to assign certain<br>IRQs to specific processors (or groups of processors). This is known<br>as SMP IRQ affinity, and it allows you control how your system will respond<br>to various hardware events. It allows you to restrict or repartition<br>the work load that you server must do so that it can more efficiently do<br>it’s job.</p>
<p>Obviously, in order for this to work, you will need a system that has more<br>than one processor (SMP). You will also need to be running a 2.4 or higher<br>kernel.</p>
<p>Some brief and very bare information on SMP IRQ affinity is provided in<br>the kernel source tree of the 2.4 kernel in the file:</p>
<pre><code>/usr/src/linux-2.4/Documentation/IRQ-affinity.txt
</code></pre><p>How to use it:</p>
<p>SMP affinity is controlled by manipulating files in the /proc/irq/ directory.<br>In /proc/irq/ are directories that correspond to the IRQs present on your<br>system (not all IRQs may be available). In each of these directories is<br>the “smp_affinity” file, and this is where we will work our magic.</p>
<p>The first order of business is to figure out what IRQ a device is using.<br>This information is available in the /proc/interrupts file. Here’s a sample:</p>
<p> [root@archimedes /proc]# cat /proc/interrupts<br> CPU0 CPU1 CPU2 CPU3<br> 0: 4865302 5084964 4917705 5017077 IO-APIC-edge timer<br> 1: 132 108 159 113 IO-APIC-edge keyboard<br> 2: 0 0 0 0 XT-PIC cascade<br> 8: 0 1 0 0 IO-APIC-edge rtc<br> 10: 0 0 0 0 IO-APIC-level usb-ohci<br> 14: 0 0 1 1 IO-APIC-edge ide0<br> 24: 87298 86066 86012 86626 IO-APIC-level aic7xxx<br> 31: 93707 106211 107988 93329 IO-APIC-level eth0<br> NMI: 0 0 0 0<br> LOC: 19883500 19883555 19883441 19883424<br> ERR: 0<br> MIS: 0</p>
<p>As you can see, this is a 4 processor machine. The first column (unlabelled)<br>lists the IRQs used on the system. The rows with letters (ie, “NMI”, “LOC”)<br>are parts of other drivers used on the system and aren’t really accessible<br>to us, so we’ll just ignore them.</p>
<p>The second through fifth columns (labelled CPU0-CPU3) show the number of times<br>the corresponding process has handled an interrupt from that particular IRQ.<br>For example, all of the CPUs have handled roughly the same number of interrupts<br>for IRQ 24 (around 86,000 with CPU0 handling a little over 87,000).</p>
<p>The sixth column lists whether or not the device driver associated with the<br>interrupt supports IO-APIC (see /usr/src/linux/Documentation/i386/IO-APIC.txt<br>for more information). The only reason to look at this value is that<br>SMP affinity will only work for IO-APIC enabled device drivers. For<br>example, we will not be able to change the affinity for the “cascade”<br>driver (IRQ 2) because it doesn’t support IO-APIC.</p>
<p>Finally, the seventh and last column lists the driver or device that is<br>associated with the interrupt. In the above example, our ethernet card<br>(eth0) is using IRQ 31, and our SCSI controller (aic7xxx) is using IRQ 24.</p>
<p>The first and last columns are really the only ones we’re interested in here.<br>For the rest of this example, I’m going to assume that we want to adjust<br>the SMP affinity for th SCSI controller (IRQ 24).</p>
<p>Now that we’ve got the IRQ, we can change the processor affinity. To<br>do this, we’ll go into the /proc/irq/24/ directory, and see what the<br>affinity is currently set to:</p>
<p> [root@archimedes Documentation]# cat /proc/irq/24/smp_affinity<br> ffffffff</p>
<p>This is a bitmask that represents which processors any interrupts on IRQ<br>24 should be routed to. Each field in the bit mask corresponds to a processor.<br>The number held in the “smp_affinity” file is presented in hexadecimal format,<br>so in order to manipulate it properly we will need to convert our bit patterns<br>from binary to hex before setting them in the proc file.</p>
<p>Each of the “f”s above represents a group of 4 CPUs, with the rightmost<br>group being the least significant. For the purposes of our discussion,<br>we’re going to limit ourselves to only the first 4 CPUs (although we can<br>address up to 32).</p>
<p>In short, this means you only have to worry about the rightmost “f” and you<br>can assume everything else is a “0” (ie, our bitmask is “0000000f”).</p>
<p>“f” is the hexadecimal represenatation for the decimal number 15 (fifteen)<br>and the binary pattern of “1111”. Each of the places in the binary pattern<br>corresponds to a CPU in the server, which means we can use the following<br>chart to represent the CPU bit patterns:</p>
<pre><code> Binary Hex
CPU 0 0001 1
CPU 1 0010 2
CPU 2 0100 4
CPU 3 1000 8
</code></pre><p>By combining these bit patterns (basically, just adding the Hex values), we<br>can address more than one processor at a time. For example, if I wanted<br>to talk to both CPU0 and CPU2 at the same time, the result is:</p>
<pre><code> Binary Hex
CPU 0 0001 1
</code></pre><h2 id="CPU-2-0100-4"><a href="#CPU-2-0100-4" class="headerlink" title=" + CPU 2 0100 4"></a> + CPU 2 0100 4</h2><pre><code>both 0101 5
</code></pre><p>If I want to address all four of the processors at once, then the result is:</p>
<pre><code> Binary Hex
CPU 0 0001 1
CPU 1 0010 2
CPU 2 0100 4
</code></pre><h2 id="CPU-3-1000-8"><a href="#CPU-3-1000-8" class="headerlink" title=" + CPU 3 1000 8"></a> + CPU 3 1000 8</h2><pre><code>both 1111 f
</code></pre><p>(Remember that we use the letters “a” through “f” to represent the numbers<br> “10” to “15” in hex notation).</p>
<p>Given that, we now know that if we have a four processor system, we can<br>assign any of 15 different CPU combinations to an IRQ (it would be 16, but<br>it isn’t legal to assign an IRQ affinity of “0” to any IRQ… if you try,<br>Linux will just ignore your attempt).</p>
<p>So. Now we get to the fun part. Remember in our /proc/interrupts listing<br>above that all four of our CPUs had handled the close to the same amount of<br>interrupts for our SCSI card? We now have the tools needed to limit managing<br>the SCSI card to just one processor and leave the other three free to<br>concentrate on doing other tasks. Let’s assume that we want to dedicate<br>our first CPU (CPU0) to handling the SCSI controller interrupts. To do this,<br>we would simply run the following command:</p>
<p> [root@archimedes /proc]# echo 1 > /proc/irq/24/smp_affinity<br> [root@archimedes /proc]# cat /proc/irq/24/smp_affinity<br> 00000001</p>
<p>Now, let’s test it out and see what happens:</p>
<p> [root@archimedes /proc]# cd /tmp/<br> [root@archimedes /tmp]# tar -zcf test.tgz /usr/src/linux-2.4.2<br> tar: Removing leading `/‘ from member names<br> [root@archimedes /tmp]# tar -zxf test.tgz && rm -rf usr/<br> [root@archimedes /tmp]# tar -zxf test.tgz && rm -rf usr/<br> [root@archimedes /tmp]# tar -zxf test.tgz && rm -rf usr/<br> [root@archimedes /tmp]# tar -zxf test.tgz && rm -rf usr/<br> [root@archimedes /tmp]# tar -zxf test.tgz && rm -rf usr/<br> [root@archimedes /tmp]# cat /proc/interrupts | grep 24:<br> 24: 99719 86067 86012 86627 IO-APIC-level aic7xxx</p>
<p>Compare that to the previous run without having the IRQ bound to CPU0:</p>
<p> 24: 87298 86066 86012 86626 IO-APIC-level aic7xxx</p>
<p>All of the interrupts from the disk controller are now handled exclusively<br>by the first CPU (CPU0), which means that our other 3 proccessors are free<br>to do other stuff now.</p>
<p>Finally, it should be pointed out that if you decide you no longer want<br>SMP affinity and would rather have the system revert back to the old set up,<br>then you can simply do:</p>
<p> [root@archimedes /tmp]# cat /proc/irq/prof_cpu_mask >/proc/irq/24/smp_affinity</p>
<p>This will reset the “smp_affinity” file to use all “f”s, and will return to<br>the load sharing arrangement that we saw earlier.</p>
<p>What can I use it for?</p>
<ul>
<li><p>“balance” out multiple NICs in a multi-processor machine. By tying a single<br>NIC to a single CPU, you should be able to scale the amount of traffic<br>your server can handle nicely.</p>
</li>
<li><p>database servers (or servers with lots of disk storage) that also have<br>heavy network loads can dedicate a CPU to their disk controller and assign<br>another to deal with the NIC to help improve response times.</p>
</li>
</ul>
<p>Can I do this with processes?</p>
<p>At this time, no.</p>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/proc-irq-number-smp-affinity/" data-id="cj2ltcrbd000dvts6mwo53c4j" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/proc-irq-number-smp-affinity/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-top cpu含义" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/top cpu含义/" class="article-date">
<time datetime="2017-03-30T05:52:25.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/top cpu含义/">top cpu含义</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>使用系统命令top即可看到如下类似信息:</p>
<p>Cpu(s): 0.0%us, 0.5%sy, 0.0%ni, 99.5%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st</p>
<p>us: is meaning of “user CPU time”<br>sy: is meaning of “system CPU time”<br>ni: is meaning of” nice CPU time”<br>id: is meaning of “idle”<br>wa: is meaning of “iowait”<br>hi:is meaning of “hardware irq”<br>si : is meaning of “software irq”<br>st : is meaning of “steal time”</p>
<p>us 用户空间占用CPU百分比<br>sy 内核空间占用CPU百分比<br>ni 用户进程空间内改变过优先级的进程占用CPU百分比<br>id 空闲CPU百分比<br>wa 等待输入输出的CPU时间百分比<br>hi 硬件中断<br>si 软件中断<br>st: 实时</p>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/top cpu含义/" data-id="cj2ltcrba000bvts6bh9x4xil" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/top cpu含义/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-linux下cpu物理个数、多核、超线程判断解析" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/30/linux下cpu物理个数、多核、超线程判断解析/" class="article-date">
<time datetime="2017-03-30T03:10:29.000Z" itemprop="datePublished">2017-03-30</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/30/linux下cpu物理个数、多核、超线程判断解析/">linux下cpu物理个数、多核、超线程判断解析</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>在Linux体系中,供给了proc文件体系显示体系的软硬件信息。若是想懂得体系中CPU的供给商和相干设备信息,则可以经由过程/proc/cpuinfo文件获得。本文章针对该文件进行简单的总结。</p>
<p>基于指令集(ISA)的CPU产生的/proc/cpuinfo文件不一样,基于X86指令集CPU的/proc/cpuinfo文件包含如下内容:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div></pre></td><td class="code"><pre><div class="line">processor : 0</div><div class="line">vendor_id : GenuineIntel</div><div class="line">cpu family : 6</div><div class="line">model : 23</div><div class="line">model name : Intel(R) Xeon(R) CPU E5430 @ 2.66GHz</div><div class="line">stepping : 10</div><div class="line">cpu MHz : 2666.890</div><div class="line">cache size : 6144 KB</div><div class="line">physical id : 0</div><div class="line">siblings : 4</div><div class="line">core id : 0</div><div class="line">cpu cores : 4</div><div class="line">apicid : 0</div><div class="line">initial apicid : 0</div><div class="line">fpu : yes</div><div class="line">fpu_exception : yes</div><div class="line">cpuid level : 13</div><div class="line">wp : yes</div><div class="line">flags : fpu vme de pse tsc msr pae mce cx8 apic mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc arch_perfmon pebs bts rep_good aperfmperf pni tm2 ssse3 lahf_lm dts</div><div class="line">bogomips : 5333.78</div><div class="line">clflush size : 64</div><div class="line">cache_alignment : 64</div><div class="line">address sizes : 38 bits physical, 48 bits virtual</div><div class="line">power management:</div></pre></td></tr></table></figure>
<p>以上输出项的含义如下:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div></pre></td><td class="code"><pre><div class="line">processor :体系中逻辑处理惩罚核的编号。对于单核处理惩罚器,则课认为是其CPU编号,对于多核处理惩罚器则可所以物理核、或者应用超线程技巧虚拟的逻辑核</div><div class="line">vendor_id :CPU建造商 </div><div class="line">cpu family :CPU产品系列代号</div><div class="line">model :CPU属于其系列中的哪一代的代号</div><div class="line">model name:CPU属于的名字及其编号、标称主频</div><div class="line">stepping :CPU属于建造更新版本</div><div class="line">cpu MHz :CPU的实际应用主频</div><div class="line">cache size :CPU二级缓存大小</div><div class="line">physical id :单个CPU的标号</div><div class="line">siblings :单个CPU逻辑物理核数</div><div class="line">core id :当前物理核在其所处CPU中的编号,这个编号不必然连气儿</div><div class="line">cpu cores :该逻辑核所处CPU的物理核数</div><div class="line">apicid :用来区分不合逻辑核的编号,体系中每个逻辑核的此编号必定不合,此编号不必然连气儿</div><div class="line">fpu :是否具有浮点运算单位(Floating Point Unit)</div><div class="line">fpu_exception :是否支撑浮点策画异常</div><div class="line">cpuid level :履行cpuid指令前,eax存放器中的值,按照不合的值cpuid指令会返回不合的内容</div><div class="line">wp :注解当前CPU是否在内核态支撑对用户空间的写保护(Write Protection)</div><div class="line">flags :当前CPU支撑的功能</div><div class="line">bogomips :在体系内核启动时粗略测算的CPU速度(Million Instructions Per Second)</div><div class="line">clflush size :每次刷新缓存的大小单位</div><div class="line">cache_alignment :缓存地址对齐单位</div><div class="line">address sizes :可接见地址空间位数</div><div class="line">power management :对能源经管的支撑</div></pre></td></tr></table></figure>
<p>CPU信息中flags各项含义:</p>
<figure class="highlight plain"><table><tr><td class="gutter"><pre><div class="line">1</div><div class="line">2</div><div class="line">3</div><div class="line">4</div><div class="line">5</div><div class="line">6</div><div class="line">7</div><div class="line">8</div><div class="line">9</div><div class="line">10</div><div class="line">11</div><div class="line">12</div><div class="line">13</div><div class="line">14</div><div class="line">15</div><div class="line">16</div><div class="line">17</div><div class="line">18</div><div class="line">19</div><div class="line">20</div><div class="line">21</div><div class="line">22</div><div class="line">23</div><div class="line">24</div><div class="line">25</div><div class="line">26</div><div class="line">27</div><div class="line">28</div><div class="line">29</div><div class="line">30</div><div class="line">31</div><div class="line">32</div><div class="line">33</div><div class="line">34</div><div class="line">35</div><div class="line">36</div><div class="line">37</div></pre></td><td class="code"><pre><div class="line">fpu: Onboard (x87) Floating Point Unit</div><div class="line">vme: Virtual Mode Extension</div><div class="line">de: Debugging Extensions</div><div class="line">pse: Page Size Extensions</div><div class="line">tsc: Time Stamp Counter: support for RDTSC and WRTSC instructions</div><div class="line">msr: Model-Specific Registers</div><div class="line">pae: Physical Address Extensions: ability to access 64GB of memory; only 4GB can be accessed at a time though</div><div class="line">mce: Machine Check Architecture</div><div class="line">cx8: CMPXCHG8 instruction</div><div class="line">apic: Onboard Advanced Programmable Interrupt Controller</div><div class="line">sep: Sysenter/Sy***it Instructions; SYSENTER is used for jumps to kernel memory during system calls, and SY***IT is used for jumps: back to the user code</div><div class="line">mtrr: Memory Type Range Registers</div><div class="line">pge: Page Global Enable</div><div class="line">mca: Machine Check Architecture</div><div class="line">cmov: CMOV instruction</div><div class="line">pat: Page Attribute Table</div><div class="line">pse36: 36-bit Page Size Extensions: allows to map 4 MB pages into the first 64GB RAM, used with PSE.</div><div class="line">pn: Processor Serial-Number; only available on Pentium 3</div><div class="line">clflush: CLFLUSH instruction</div><div class="line">dtes: Debug Trace Store</div><div class="line">acpi: ACPI via MSR</div><div class="line">mmx: MultiMedia Extension</div><div class="line">fxsr: FXSAVE and FXSTOR instructions</div><div class="line">sse: Streaming SIMD Extensions. Single instruction multiple data. Lets you do a bunch of the same operation on different pieces of input: in a single clock tick.</div><div class="line">sse2: Streaming SIMD Extensions-2. More of the same.</div><div class="line">selfsnoop: CPU self snoop</div><div class="line">acc: Automatic Clock Control</div><div class="line">IA64: IA-64 processor Itanium.</div><div class="line">ht: HyperThreading. Introduces an imaginary second processor that doesn’t do much but lets you run threads in the same process a bit quicker.</div><div class="line">nx: No ute bit. Prevents arbitrary code running via buffer overflows.</div><div class="line">pni: Prescott New Instructions aka. SSE3</div><div class="line">vmx: Intel Vanderpool hardware virtualization technology</div><div class="line">svm: AMD “Pacifica” hardware virtualization technology</div><div class="line">lm: “Long Mode,” which means the chip supports the AMD64 instruction set</div><div class="line">tm: “Thermal Monitor” Thermal throttling with IDLE instructions. Usually hardware controlled in response to CPU temperature.</div><div class="line">tm2: “Thermal Monitor 2″ Decrease speed by reducing multipler and vcore.</div><div class="line">est: “Enhanced SpeedStep”</div></pre></td></tr></table></figure>
<ul>
<li>查看CPU信息命令<br>cat /proc/cpuinfo</li>
<li>查看内存信息命令<br>cat /proc/meminfo</li>
<li><p>查看硬盘信息命令<br>fdisk -l</p>
</li>
<li><p>查询体系CPU的物理个数<br> cat /proc/cpuinfo |grep “physical id”|sort |uniq|wc -l</p>
</li>
<li>查询体系具有几许个逻辑核<br> cat /proc/cpuinfo | grep “processor” | wc -l</li>
<li>查询体系CPU的物理核数<br> cat /proc/cpuinfo | grep “cpu cores” | uniq</li>
<li>查询体系CPU是否启用超线程<br> cat /proc/cpuinfo | grep -e “cpu cores” -e “siblings” | sort | uniq</li>
<li>查询CPU的主频<br> cat /proc/cpuinfo |grep MHz|uniq<br> 输出举例:<br> cpu cores : 6<br> siblings : 6</li>
<li>查看当前系统内核信息<br> uname -a<br> Linux localhost.localdomain 2.6.32-220.el6.x86_64 #1 SMP Tue Dec 6 19:48:22 GMT2011x86_64 x86_64 x86_64 GNU/Linux</li>
<li><p>查看当前操作系统发行版信息:<br> cat /etc/issue | grep Linux<br> Red Hat Enterprise Linux AS release 4 (Nahant Update 5)</p>
</li>
<li><p>查看逻辑CPU、CPU型号<br> cat /proc/cpuinfo | grep name | cut -f2 -d: | uniq -c<br> 8 Intel(R) Xeon(R) CPU E5410 @ 2.33GHz</p>
</li>
<li><p>查看物理核心</p>
<h1 id="cat-proc-cpuinfo-grep-physical-uniq-c"><a href="#cat-proc-cpuinfo-grep-physical-uniq-c" class="headerlink" title="cat /proc/cpuinfo | grep physical | uniq -c"></a>cat /proc/cpuinfo | grep physical | uniq -c</h1><p> 4 physical id : 0<br> 4 physical id : 1<br> (说明实际上是两颗4核的CPU)</p>
</li>
<li><p>32/64位</p>
<h1 id="getconf-LONG-BIT"><a href="#getconf-LONG-BIT" class="headerlink" title="getconf LONG_BIT"></a>getconf LONG_BIT</h1><p> 32<br> (说明当前CPU运行在32bit模式下, 但不代表CPU不支持64bit)</p>
</li>
<li><h1 id="cat-proc-cpuinfo-grep-flags-grep-‘-lm-‘-wc-l"><a href="#cat-proc-cpuinfo-grep-flags-grep-‘-lm-‘-wc-l" class="headerlink" title="cat /proc/cpuinfo | grep flags | grep ‘ lm ‘ | wc -l"></a>cat /proc/cpuinfo | grep flags | grep ‘ lm ‘ | wc -l</h1><p> 8<br> (结果大于0, 说明支持64bit计算. lm指long mode, 支持lm则是64bit)</p>
</li>
<li><p>若是cpu cores数量和siblings数量一致,则没有启用超线程,不然超线程被启用。</p>
</li>
</ul>
<p>查询体系CPU是否支撑某项功能,则根以上类似,输出成果进行sort, uniq和grep就可以获得成果。<br>processor 条目包括这一逻辑处理器的唯一标识符。<br>physical id 条目包括每个物理封装的唯一标识符。<br>core id 条目保存每个内核的唯一标识符。<br>siblings 条目列出了位于相同物理封装中的逻辑处理器的数量。<br>cpu cores 条目包含位于相同物理封装中的内核数量。<br>如果处理器为英特尔处理器,则 vendor id 条目中的字符串是 GenuineIntel。<br>拥有相同 physical id 的所有逻辑处理器共享同一个物理插座。每个 physical id 代表一个唯一的物理封装。Siblings 表示位于这一物理封装上的逻辑处理器的数量。它们可能支持也可能不支持超线程(HT)技术。每个 core id 均代表一个唯一的处理器内核。所有带有相同 core id 的逻辑处理器均位于同一个处理器内核上。如果有一个以上逻辑处理器拥有相同的 core id 和 physical id,则说明系统支持超线程(HT)技术。如果有两个或两个以上的逻辑处理器拥有相同的 physical id,但是 core id 不同,则说明这是一个多内核处理器。cpu cores 条目也可以表示是否支持多内核。<br>例如,如果系统包含两个物理封装,每个封装中又包含两个支持超线程(HT)技术的处理器内核,则 /proc/cpuinfo 文件将包含此数据。(注:数据并不在表格中。)</p>
<p>?processor ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7<br>?physical id ?0 ?1 ?0 ?1 ?0 ?1 ?0 ?1<br>?core id ?0 2 1 ?3 ?0 ?2 ?1 ?3<br>?siblings ?4 ?4 ?4 ?4 ?4 ?4 ?4 ?4<br>?cpu cores ?2 ?2 ?2 ?2 ?2 ?2 ?2 ?2</p>
<p>此例说明逻辑处理器 0 和 4 驻留在物理封装 0 的内核 0 上。这就表示逻辑处理器 0 和 4 支持超线程(HT)技术。相同的工作可用于封装 0 内核 1 上的逻辑处理器 2 和 6,封装 1 内核 2 上的逻辑处理器 1 和 5,以及封装 1 内核 3 上的逻辑处理器 3 和 7。此系统支持超线程(HT)技术,因为两个逻辑处理器共享同一个内核。有两种方式可以确定是否支持多内核。由于内核 0 和 1 存在于封装 0 上,而内核 2 和 3 存在于封装 1 上,所以这是一个多内核系统。此外,cpu cores 条目为 2,也说明有两个内核驻留在物理封装中。这是一个多路系统,因为有两个封装。值得注意的是 physical id 和 core id 的编号可能是也可能不是连续的。系统上有两个物理封装并不罕见,而且 physical id 等于 0 和 3</p>
<p>CPU ID<br>CPU ID是CPU生产厂家为识别不同类型的CPU,而为CPU制订的不同的单一的代码;不同厂家的CPU,其CPU ID定义也是不同的;如 “0F24”(Inter处理器)、“681H”(AMD处理器),根据这些数字代码即可判断CPU属于哪种类型,这就是一般意义上的CPU ID。 由于计算机使用的是十六进制,因此CPU ID也是以十六进制表示的。Inter处理器的CPU ID一共包含四个数字,如“0F24”,从左至右分别表示 Type(类型)、Family(系列)、Mode(型号)和Stepping(步进编号)。从CPUID为“068X”的处理器开始,Inter另外增 加了Brand ID(品种标识)用来辅助应用程序识别CPU的类型,因此根据“068X”CPUID还不能正确判别Pentium和Celerom处理 器。必须配合Brand ID来进行细分。AMD处理器一般分为三位,如“681”,从左至右分别表示为Family(系列)、Mode(型号)和 Stepping(步进编号)。</p>
<p>Type(类型)<br>类型标识用来区别INTEL微处理器是用于由最终用户安装,还是由专业个人计算机系 统集成商、服务公司或制作商安装;数字“1”标识所测试的微处理器是用于由用户安装的;数字“0”标识所测试的微处理器是用于由专业个人计算机系统集成 商、服务公司或制作商安装的。我们通常使用的INTEL处理器类型标识都是“0”,“0F24”CPUID就属于这种类型。</p>
<p>Family(系列)<br>系 列标识可用来确定处理器属于那一代产品。如6系列的INTEL处理器包括Pentium Pro、Pentium II、 Pentium II Xeon、Pentium III和Pentium III Xeon处理器。5系列(第五代)包括Pentium处理器和采用 MMX技术的Pentium处理器。AMD的6系列实际指有K7系列CPU,有DURON和ATHION两大类。最新一代的 INTEL Pentium 4系列处理器(包括相同核心的Celerom处理器)的系列值为“F”</p>
<p>Mode(型号)<br>型号标识可用来 确定处理器的制作技术以及属于该系列的第几代设计(或核心),型号与系列通常是相互配合使用的,用于确定计算机所安装的处理器是属于某系列处理器的哪种特 定类型。如可确定Celerom处理器是Coppermine还是Tualutin核心;Athlon XP处理器是Paiomino还是 Thorouhgbred核心。</p>
<p>Stepping(步进编号)<br>步进编号用来标识处理器的设计或制作版本,有助于控制和跟踪处理器的更 改,步进还可以让最终用户更具体地识别其系统安装的处理器版本,确定微处理器的内部设计或制作特性。步进编号就好比处理器的小版本号,如CPUID为 “686”和“686A”就好比WINZIP8.0和8.1的关系。步进编号和核心步进是密切联系的。如CPUID为“686”的Pentium III 处理器是cCO核心,而“686A”表示的是更新版本cD0核心。</p>
<p>Brand ID(品种标识)<br>INTEL从Coppermine核心的处理器开始引入Brand ID作为CPU的辅助识别手段。如我们通过Brand ID可以识别出处理器究竟是Celerom还是Pentium 4。</p>
<p>在LINUX系统中,一颗超线程CPU,将被识别为两颗CPU,一颗双核CPU,也被识别为两颗CPU,而一颗双核超线程CPU,会被认为是4颗CPU。那么,我们如何确定我们机器的CPU数量呢?</p>
<p>仔细查看/proc/cpuinfo我们会发现以下信息:</p>
<ul>
<li>physical id代表每颗物理CPU的ID,有几个CPU ID,就有几颗物理CPU。</li>
<li>siblings区别出了超线程CPU中的逻辑CPU核心,一颗超线程CPU,其physical id是一样的,但是siblings是不同的。</li>
<li>core id和cpu cores用来对双核(多核心)CPU进行区分的,CPU cores表示这颗CPU有几个核心,而core id用来表示CPU的各个核心的。</li>
</ul>
<p>例如:如何区分一颗双核超线程CPU?<br>cat /etc/proc/cpuinfo<br>{<br>physical id=0 (1颗物理CPU)<br> [<br> core id=0 (双核CPU中的第一个核心)<br> cpu cores=2 (双核CPU)<br> siblings=0 (此核心中的第一个逻辑CPU)<br> siblings=1 (此核心中的另一个逻辑CPU)<br> ]<br> [<br> core id=1 (双核CPU中的另一个核心)<br> cpu cores=2 (双核CPU)<br> siblings=0 (此核心中的第一个逻辑CPU)<br> siblings=1 (此核心中的另一个逻辑CPU)<br> ]<br>}</p>
</div>
<footer class="article-footer">
<a data-url="http://blog.xuguruogu.com/2017/03/30/linux下cpu物理个数、多核、超线程判断解析/" data-id="cj2ltcrb20005vts6s7fj1etr" class="article-share-link">Teilen</a>
<a href="http://blog.xuguruogu.com/2017/03/30/linux下cpu物理个数、多核、超线程判断解析/#disqus_thread" class="article-comment-link">Kommentare</a>
<ul class="article-tag-list"><li class="article-tag-list-item"><a class="article-tag-list-link" href="/tags/linux/">linux</a></li></ul>
</footer>
</div>
</article>
<article id="post-Linux系统调用列表-收藏" class="article article-type-post" itemscope itemprop="blogPost">
<div class="article-meta">
<a href="/2017/03/23/Linux系统调用列表-收藏/" class="article-date">
<time datetime="2017-03-22T16:14:08.000Z" itemprop="datePublished">2017-03-23</time>
</a>
</div>
<div class="article-inner">
<header class="article-header">
<h1 itemprop="name">
<a class="article-title" href="/2017/03/23/Linux系统调用列表-收藏/">Linux系统调用列表(收藏)</a>
</h1>
</header>
<div class="article-entry" itemprop="articleBody">
<p>以下是Linux系统调用的一个列表,包含了大部分常用系统调用和由系统调用派生出的的函数。这可能是你在互联网上所能看到的唯一一篇中文注释的Linux系统调用列表,即使是简单的字母序英文列表,能做到这么完全也是很罕见的。</p>
<p>按照惯例,这个列表以man pages第2节,即系统调用节为蓝本。按照笔者的理解,对其作了大致的分类,同时也作了一些小小的修改,删去了几个仅供内核使用,不允许用户调用的系统调用,对个别本人稍觉不妥的地方作了一些小的修改,并对所有列出的系统调用附上简要注释。 </p>
<p>其中有一些函数的作用完全相同,只是参数不同。(可能很多熟悉C++朋友马上就能联想起函数重载,但是别忘了Linux核心是用C语言写的,所以只能取成不同的函数名)。还有一些函数已经过时,被新的更好的函数所代替了(gcc在链接这些函数时会发出警告),但因为兼容的原因还保留着,这些函数我会在前面标上“*”号以示区别。</p>
<ul>
<li>进程控制:</li>
</ul>
<table>
<thead>
<tr>
<th>函数</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>fork</td>
<td>创建一个新进程</td>
</tr>
<tr>
<td>clone</td>
<td>按指定条件创建子进程</td>
</tr>
<tr>
<td>execve</td>
<td>运行可执行文件</td>
</tr>
<tr>
<td>exit</td>
<td>中止进程</td>
</tr>
<tr>
<td>_exit</td>
<td>立即中止当前进程</td>
</tr>
<tr>
<td>getdtablesize</td>
<td>进程所能打开的最大文件数</td>
</tr>
<tr>
<td>getpgid</td>
<td>获取指定进程组标识号</td>
</tr>
<tr>
<td>setpgid</td>
<td>设置指定进程组标志号</td>
</tr>
<tr>
<td>getpgrp</td>
<td>获取当前进程组标识号</td>
</tr>
<tr>
<td>setpgrp</td>
<td>设置当前进程组标志号</td>
</tr>
<tr>
<td>getpid</td>
<td>获取进程标识号</td>
</tr>
<tr>
<td>getppid</td>
<td>获取父进程标识号</td>
</tr>
<tr>
<td>getpriority</td>
<td>获取调度优先级</td>
</tr>
<tr>
<td>setpriority</td>
<td>设置调度优先级</td>
</tr>
<tr>
<td>modify_ldt</td>
<td>读写进程的本地描述表</td>
</tr>
<tr>
<td>nanosleep</td>
<td>使进程睡眠指定的时间</td>
</tr>
<tr>
<td>nice</td>
<td>改变分时进程的优先级</td>
</tr>
<tr>
<td>pause</td>
<td>挂起进程,等待信号</td>
</tr>
<tr>
<td>personality</td>
<td>设置进程运行域</td>
</tr>
<tr>
<td>prctl</td>
<td>对进程进行特定操作</td>
</tr>
<tr>
<td>ptrace</td>
<td>进程跟踪</td>
</tr>
<tr>
<td>sched_get_priority_max</td>
<td>取得静态优先级的上限</td>
</tr>
<tr>
<td>sched_get_priority_min</td>
<td>取得静态优先级的下限</td>
</tr>
<tr>
<td>sched_getparam</td>
<td>取得进程的调度参数</td>
</tr>
<tr>
<td>sched_getscheduler</td>
<td>取得指定进程的调度策略</td>
</tr>
<tr>
<td>sched_rr_get_interval</td>
<td>取得按RR算法调度的实时进程的时间片长度</td>
</tr>
<tr>
<td>sched_setparam</td>
<td>设置进程的调度参数</td>
</tr>
<tr>
<td>sched_setscheduler</td>
<td>设置指定进程的调度策略和参数</td>
</tr>
<tr>
<td>sched_yield</td>
<td>进程主动让出处理器,并将自己等候调度队列队尾</td>
</tr>
<tr>
<td>vfork</td>
<td>创建一个子进程,以供执行新程序,常与execve等同时使用</td>
</tr>
<tr>
<td>wait</td>
<td>等待子进程终止</td>
</tr>
<tr>
<td>wait3</td>
<td>参见wait</td>
</tr>
<tr>
<td>waitpid</td>
<td>等待指定子进程终止</td>
</tr>
<tr>
<td>wait4</td>
<td>参见waitpid</td>
</tr>
<tr>
<td>capget</td>
<td>获取进程权限</td>
</tr>
<tr>
<td>capset</td>
<td>设置进程权限</td>
</tr>
<tr>
<td>getsid</td>
<td>获取会晤标识号</td>
</tr>
<tr>
<td>setsid</td>
<td>设置会晤标识号</td>
</tr>
</tbody>
</table>
<ul>
<li>文件系统控制<ul>
<li>文件读写操作</li>
</ul>
</li>
</ul>
<table>
<thead>
<tr>
<th>函数</th>
<th>描述</th>
</tr>
</thead>
<tbody>
<tr>
<td>fcntl</td>
<td>文件控制</td>
</tr>
<tr>
<td>open</td>
<td>打开文件</td>
</tr>
<tr>
<td>creat</td>
<td>创建新文件</td>
</tr>
<tr>
<td>close</td>
<td>关闭文件描述字</td>
</tr>
<tr>
<td>read</td>
<td>读文件</td>
</tr>
<tr>
<td>write</td>
<td>写文件</td>
</tr>
<tr>
<td>readv</td>
<td>从文件读入数据到缓冲数组中</td>
</tr>
<tr>
<td>writev</td>
<td>将缓冲数组里的数据写入文件</td>
</tr>
<tr>
<td>pread</td>
<td>对文件随机读</td>
</tr>
<tr>
<td>pwrite</td>
<td>对文件随机写</td>
</tr>
<tr>