-
Notifications
You must be signed in to change notification settings - Fork 94
/
Ting_Zhao_zotero.csv
We can make this file beautiful and searchable if this error is corrected: Illegal quoting in line 2.
3173 lines (3159 loc) · 160 KB
/
Ting_Zhao_zotero.csv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:z="http://www.zotero.org/namespaces/export#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:bib="http://purl.org/net/biblio#"
xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:link="http://purl.org/rss/1.0/modules/link/"
xmlns:vcard="http://nwalsh.com/rdf/vCard#">
<bib:Article rdf:about="http://www.sciencedirect.com.ezp-prod1.hul.harvard.edu/science/article/pii/S1075293513000196">
<z:itemType>journalArticle</z:itemType>
<dcterms:isPartOf>
<bib:Journal>
<prism:volume>20</prism:volume>
<dc:title>Assessing Writing</dc:title>
</bib:Journal>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Shermis</foaf:surname>
<foaf:givenname>M.D.</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_3"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.sciencedirect.com.ezp-prod1.hul.harvard.edu/science/article/pii/S1075293513000196</rdf:value>
</dcterms:URI>
</dc:identifier>
<bib:pages>53-76</bib:pages>
<dc:date>April 2014</dc:date>
<dcterms:dateSubmitted>2014-08-20 23:48:38</dcterms:dateSubmitted>
<dc:title>State-of-the-art automated essay scoring: Competition, results, and future directions from a United States demonstration</dc:title>
</bib:Article>
<z:Attachment rdf:about="#item_3">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.sciencedirect.com.ezp-prod1.hul.harvard.edu/science/article/pii/S1075293513000196</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2014-08-20 23:48:39</dcterms:dateSubmitted>
<dc:title>State-of-the-art automated essay scoring: Competition, results, and future directions from a United States demonstration</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="http://www.wired.com/2014/08/i-liked-everything-i-saw-on-facebook-for-two-days-heres-what-it-did-to-me/">
<z:itemType>webpage</z:itemType>
<dcterms:isPartOf>
<z:Website><dc:title>WIRED</dc:title></z:Website>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Honan</foaf:surname>
<foaf:givenname>Mat</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_5"/>
<dc:subject>
<z:AutomaticTag><rdf:value>Facebook</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>like buttons</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>News Feed</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.wired.com/2014/08/i-liked-everything-i-saw-on-facebook-for-two-days-heres-what-it-did-to-me/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>8/11/2014</dc:date>
<dcterms:dateSubmitted>2014-08-12 14:01:43</dcterms:dateSubmitted>
<dcterms:abstract>I like everything. Or at least I did, for 48 hours. Literally everything Facebook sent my way, I liked---even if I hated it.</dcterms:abstract>
<dc:title>I Liked Everything I Saw on Facebook for Two Days. Here’s What It Did to Me | Gadget Lab</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_5">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.wired.com/2014/08/i-liked-everything-i-saw-on-facebook-for-two-days-heres-what-it-did-to-me/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2014-08-12 14:01:43</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Article rdf:about="http://www.lifescied.org/content/13/2/167">
<z:itemType>journalArticle</z:itemType>
<dcterms:isPartOf rdf:resource="urn:issn:,%201931-7913"/>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Grunspan</foaf:surname>
<foaf:givenname>Daniel Z.</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Wiggins</foaf:surname>
<foaf:givenname>Benjamin L.</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Goodreau</foaf:surname>
<foaf:givenname>Steven M.</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<dcterms:isReferencedBy rdf:resource="#item_102"/>
<link:link rdf:resource="#item_7"/>
<dc:subject>Week 2</dc:subject>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.lifescied.org/content/13/2/167</rdf:value>
</dcterms:URI>
</dc:identifier>
<bib:pages>167-178</bib:pages>
<dc:date>06/20/2014</dc:date>
<dcterms:dateSubmitted>2014-08-20 20:21:46</dcterms:dateSubmitted>
<z:libraryCatalog>www.lifescied.org</z:libraryCatalog>
<z:language>en</z:language>
<dcterms:abstract>Social interactions between students are a major and underexplored part of undergraduate education. Understanding how learning relationships form in undergraduate classrooms, as well as the impacts these relationships have on learning outcomes, can inform educators in unique ways and improve educational reform. Social network analysis (SNA) provides the necessary tool kit for investigating questions involving relational data. We introduce basic concepts in SNA, along with methods for data collection, data processing, and data analysis, using a previously collected example study on an undergraduate biology classroom as a tutorial. We conduct descriptive analyses of the structure of the network of costudying relationships. We explore generative processes that create observed study networks between students and also test for an association between network position and success on exams. We also cover practical issues, such as the unique aspects of human subjects review for network studies. Our aims are to convince readers that using SNA in classroom environments allows rich and informative analyses to take place and to provide some initial tools for doing so, in the process inspiring future educational studies incorporating relational data.</dcterms:abstract>
<dc:title>Understanding Classrooms through Social Network Analysis: A Primer for Social Network Analysis in Education Research</dc:title>
<z:shortTitle>Understanding Classrooms through Social Network Analysis</z:shortTitle>
</bib:Article>
<bib:Journal rdf:about="urn:issn:,%201931-7913">
<prism:volume>13</prism:volume>
<prism:number>2</prism:number>
<dc:title>CBE-Life Sciences Education</dc:title>
<dc:identifier>ISSN , 1931-7913</dc:identifier>
<dcterms:alternative>CBE Life Sci Educ</dcterms:alternative>
<dc:identifier>DOI 10.1187/cbe.13-08-0162</dc:identifier>
</bib:Journal>
<bib:Memo rdf:about="#item_102">
<rdf:value><ul>
<li><span style="font-size: 9.000000pt; font-family: 'Palatino';">Network analysis can inform our understanding of student network formation in classrooms and the types of impacts these networks have on students.</span></li>
<li class="page" title="Page 2">
<div class="layoutArea">
<div class="column">
<p><span style="font-size: 9.000000pt; font-family: 'Palatino';">SNA aims to understand the deter- minants, structure, and consequences of relationships be- tween actors.</span></p>
<p><span style="font-size: 9.000000pt; font-family: 'Palatino';">The importance of relationships and emergent structures formed by relationships makes SNA different from other re- search paradigms, which often focus solely on the attributes of actors. </span></p>
</div>
</div>
</li>
<li class="page" title="Page 2">
<div class="layoutArea">
<div class="column">
<p><span style="font-size: 9.000000pt; font-family: 'Palatino';">One way to categorize networks is by the number of types of actors they contain; Networks can also be categorized by the nature of the ties they contain. </span></p>
<p><span style="font-size: 9.000000pt; font-family: 'Palatino';">The nature of network data not only allows subjects to report information on other subjects but may allow recogniz- ability of even anonymized data (called </span><span style="font-size: 9.000000pt; font-family: 'Palatino'; font-style: italic;">deductive disclosure</span><span style="font-size: 9.000000pt; font-family: 'Palatino';">), especially in small networks. </span></p>
<div class="page" title="Page 2">
<div class="page" title="Page 10">
<div class="layoutArea">
<div class="column">
<p><span style="font-size: 9.000000pt; font-family: 'Palatino';">Data were interpreted both as a description of a single network and as a longitudinal time lapse of community change. For this project, data collection required a single field of data from the institution registrar and a single survey question asked longitudinally on just two occasions.</span></p>
<p> </p>
<p> </p>
</div>
</div>
</div>
</div>
</div>
</div>
</li>
</ul></rdf:value>
</bib:Memo>
<z:Attachment rdf:about="#item_7">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.lifescied.org/content/13/2/167</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2014-08-20 20:21:46</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="http://chronicle.com/blogs/wiredcampus/why-students-should-own-their-educational-data/54329">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog>
<dc:title>The Chronicle of Higher Education Blogs: Wired Campus</dc:title>
</z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Young</foaf:surname>
<foaf:givenname>Jeffrey R.</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_9"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://chronicle.com/blogs/wiredcampus/why-students-should-own-their-educational-data/54329</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>August 21, 2014</dc:date>
<dcterms:dateSubmitted>2014-08-23 21:32:22</dcterms:dateSubmitted>
<dc:title>Why Students Should Own Their Educational Data</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_9">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://chronicle.com/blogs/wiredcampus/why-students-should-own-their-educational-data/54329</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2014-08-23 21:32:24</dcterms:dateSubmitted>
<dc:title>Chronicle of Higher Education Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<rdf:Description rdf:about="urn:isbn:978-1-4503-1918-8">
<z:itemType>conferencePaper</z:itemType>
<dcterms:isPartOf>
<bib:Journal>
<dc:identifier>ISBN 978-1-4503-1918-8</dc:identifier>
<dc:identifier>DOI 10.1145/2485760.2485822</dc:identifier>
</bib:Journal>
</dcterms:isPartOf>
<dc:publisher>
<foaf:Organization>
<vcard:adr>
<vcard:Address>
<vcard:locality>New York, NY, USA</vcard:locality>
</vcard:Address>
</vcard:adr>
<foaf:name>ACM</foaf:name>
</foaf:Organization>
</dc:publisher>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Lee</foaf:surname>
<foaf:givenname>Victor R.</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Drake</foaf:surname>
<foaf:givenname>Joel</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<dc:subject>
<z:AutomaticTag>
<rdf:value>activity trackers</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>Collaboration</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>competition</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>elementary students</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>fitbit</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>physical activity</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>Quantified Self</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>TinkerPlots</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://doi.acm.org.ezp-prod1.hul.harvard.edu/10.1145/2485760.2485822</rdf:value>
</dcterms:URI>
</dc:identifier>
<bib:pages>273–276</bib:pages>
<dc:date>2013</dc:date>
<dcterms:dateSubmitted>2014-02-27 05:07:35</dcterms:dateSubmitted>
<z:libraryCatalog>ACM Digital Library</z:libraryCatalog>
<dcterms:abstract>Recess is often a time for children in school to engage recreationally in physically demanding and highly interactive activities with their peers. This paper describes a design effort to encourage fifth-grade students to examine sensitivities associated with different measures of center by having them analyze activities during recess using over the course of a week using Fitbit activity trackers and TinkerPlots data visualization software. We describe the activity structure some observed student behaviors during the activity. We also provide a descriptive account, based on video records and transcripts, of two students who engaged thoughtfully with their recess data and developed a more sophisticated understanding of when and how outliers affect means and medians.</dcterms:abstract>
<dc:title>Quantified Recess: Design of an Activity for Elementary Students Involving Analyses of Their Own Movement Data</dc:title>
<z:shortTitle>Quantified Recess</z:shortTitle>
</rdf:Description>
<rdf:Description rdf:about="urn:isbn:978-1-4503-1111-3">
<z:itemType>conferencePaper</z:itemType>
<dcterms:isPartOf>
<bib:Journal>
<dc:identifier>ISBN 978-1-4503-1111-3</dc:identifier>
<dc:identifier>DOI 10.1145/2330601.2330661</dc:identifier>
<dc:title>Proceedings of the 2Nd International Conference on Learning Analytics and Knowledge</dc:title>
</bib:Journal>
</dcterms:isPartOf>
<dc:publisher>
<foaf:Organization>
<vcard:adr>
<vcard:Address>
<vcard:locality>New York, NY, USA</vcard:locality>
</vcard:Address>
</vcard:adr>
<foaf:name>ACM</foaf:name>
</foaf:Organization>
</dc:publisher>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Siemens</foaf:surname>
<foaf:givenname>George</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Baker</foaf:surname>
<foaf:givenname>Ryan S. J. d.</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<dc:subject>
<z:AutomaticTag><rdf:value>Collaboration</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>educational data mining</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>learning analytics and knowledge</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://doi.acm.org/10.1145/2330601.2330661</rdf:value>
</dcterms:URI>
</dc:identifier>
<bib:pages>252–254</bib:pages>
<dc:date>2012</dc:date>
<dcterms:dateSubmitted>2015-01-16 03:15:55</dcterms:dateSubmitted>
<z:libraryCatalog>ACM Digital Library</z:libraryCatalog>
<dcterms:abstract>Growing interest in data and analytics in education, teaching, and learning raises the priority for increased, high-quality research into the models, methods, technologies, and impact of analytics. Two research communities -- Educational Data Mining (EDM) and Learning Analytics and Knowledge (LAK) have developed separately to address this need. This paper argues for increased and formal communication and collaboration between these communities in order to share research, methods, and tools for data mining and analysis in the service of developing both LAK and EDM fields.</dcterms:abstract>
<dc:title>Learning Analytics and Educational Data Mining: Towards Communication and Collaboration</dc:title>
<z:shortTitle>Learning Analytics and Educational Data Mining</z:shortTitle>
</rdf:Description>
<bib:Document rdf:about="http://www.carnegiefoundation.org/blog/the-learning-analytics-landscape-tension-between-student-privacy-and-the-process-of-data-mining/">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog>
<dc:title>Carnegie Foundation for the Advancement of Teaching</dc:title>
</z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Manai</foaf:surname>
<foaf:givenname>J</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_13"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.carnegiefoundation.org/blog/the-learning-analytics-landscape-tension-between-student-privacy-and-the-process-of-data-mining/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>6 November 2015</dc:date>
<dcterms:dateSubmitted>2016-01-06 22:44:02</dcterms:dateSubmitted>
<dcterms:abstract>Data mining is a powerful tool being used by educational institutions to support student success, but often students do not know what data are being</dcterms:abstract>
<dc:title>The Learning Analytics Landscape: Tension Between Student Privacy and the Process of Data Mining</dc:title>
<z:shortTitle>The Learning Analytics Landscape</z:shortTitle>
</bib:Document>
<z:Attachment rdf:about="#item_13">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.carnegiefoundation.org/blog/the-learning-analytics-landscape-tension-between-student-privacy-and-the-process-of-data-mining/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-06 22:44:02</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Book rdf:about="http://www.oreilly.com/data/free/evaluating-machine-learning-models.csp?intcmp=il-data-free-lp-lgen_free_reports_page">
<z:itemType>book</z:itemType>
<dc:publisher>
<foaf:Organization>
<vcard:adr>
<vcard:Address>
<vcard:locality>Sebastopol, CA</vcard:locality>
</vcard:Address>
</vcard:adr>
<foaf:name>O'Reily Media</foaf:name>
</foaf:Organization>
</dc:publisher>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Zheng</foaf:surname>
<foaf:givenname>Alice</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_15"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.oreilly.com/data/free/evaluating-machine-learning-models.csp?intcmp=il-data-free-lp-lgen_free_reports_page</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>September 2015</dc:date>
<dcterms:dateSubmitted>2015-12-15 18:26:39</dcterms:dateSubmitted>
<dcterms:abstract>Data science today is a lot like the Wild West: there’s endless opportunity and excitement, but also a lot of chaos and confusion. If you’re new to data science and applied machine learning, evaluating a machine-learning model can seem pretty overwhelming...</dcterms:abstract>
<dc:title>Evaluating Machine Learning Models</dc:title>
</bib:Book>
<z:Attachment rdf:about="#item_15">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.oreilly.com/data/free/evaluating-machine-learning-models.csp?intcmp=il-data-free-lp-lgen_free_reports_page</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2015-12-15 18:26:39</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Article rdf:about="http://pdk.sagepub.com/content/96/5/19">
<z:itemType>journalArticle</z:itemType>
<dcterms:isPartOf rdf:resource="urn:issn:0031-7217,%201940-6487"/>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Krueger</foaf:surname>
<foaf:givenname>Keith R.</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Moore</foaf:surname>
<foaf:givenname>Bob</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_17"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://pdk.sagepub.com/content/96/5/19</rdf:value>
</dcterms:URI>
</dc:identifier>
<bib:pages>19-24</bib:pages>
<dc:date>2015-02-01</dc:date>
<dcterms:dateSubmitted>2015-12-17 16:16:17</dcterms:dateSubmitted>
<z:libraryCatalog>pdk.sagepub.com</z:libraryCatalog>
<z:language>en</z:language>
<dcterms:abstract>As technology has leaped forward to provide valuable learning tools, parents and policy makers have begun raising concerns about the privacy of student data that schools and systems have. Federal laws are intended to protect students and their families but they have not and will never be able to keep up with rapidly evolving technology. School systems can help themselves and their students by following a list of guidelines, the authors say.</dcterms:abstract>
<dc:title>New technology “clouds” student data privacy</dc:title>
</bib:Article>
<bib:Journal rdf:about="urn:issn:0031-7217,%201940-6487">
<prism:volume>96</prism:volume>
<prism:number>5</prism:number>
<dc:title>Phi Delta Kappan</dc:title>
<dc:identifier>ISSN 0031-7217, 1940-6487</dc:identifier>
<dcterms:alternative>Phi Delta Kappan</dcterms:alternative>
<dc:identifier>DOI 10.1177/0031721715569464</dc:identifier>
</bib:Journal>
<z:Attachment rdf:about="#item_17">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://pdk.sagepub.com/content/96/5/19</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2015-12-17 16:16:17</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="https://www.edsurge.com/news/2015-03-16-why-opting-out-of-student-data-collection-isn-t-the-solution">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog><dc:title>EdSurge</dc:title></z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Leong</foaf:surname>
<foaf:givenname>B</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Polonetsky</foaf:surname>
<foaf:givenname>J</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_19"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://www.edsurge.com/news/2015-03-16-why-opting-out-of-student-data-collection-isn-t-the-solution</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>March 16 2015</dc:date>
<dcterms:dateSubmitted>2016-01-16 16:31:25</dcterms:dateSubmitted>
<dcterms:abstract>In every privacy debate across every industry, the same questions arise about the rights of individuals to “opt-out” of their data being collected or used. So it should come as no surprise that the “when” and “how” of parent and student opt-outs of education data collection or use has become a robust</dcterms:abstract>
<dc:title>Why Opting Out of Student Data Collection Isn’t the Solution</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_19">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://www.edsurge.com/news/2015-03-16-why-opting-out-of-student-data-collection-isn-t-the-solution</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-16 16:31:25</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Recording rdf:about="http://www.lynda.com/R-tutorials/Up-Running-R/120612-2.html?org=nyu.edu">
<z:itemType>videoRecording</z:itemType>
<dc:publisher>
<foaf:Organization><foaf:name>Lynda.com</foaf:name></foaf:Organization>
</dc:publisher>
<z:directors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Poulson</foaf:surname>
<foaf:givenname>Barton</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</z:directors>
<link:link rdf:resource="#item_21"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.lynda.com/R-tutorials/Up-Running-R/120612-2.html?org=nyu.edu</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>April 4 2013</dc:date>
<dcterms:dateSubmitted>2016-01-17 18:03:03</dcterms:dateSubmitted>
<dcterms:abstract>Introduces the R statistical processing language, including how to install R, read data from SPSS and spreadsheets, analyze data, and create charts and plots.</dcterms:abstract>
<dc:title>Up and Running with R</dc:title>
</bib:Recording>
<z:Attachment rdf:about="#item_21">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.lynda.com/R-tutorials/Up-Running-R/120612-2.html?org=nyu.edu</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-17 18:03:03</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Recording rdf:about="https://www.youtube.com/watch?v=_iv8A1pHNYA">
<z:itemType>videoRecording</z:itemType>
<z:directors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Educause</foaf:surname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</z:directors>
<dc:subject>
<z:AutomaticTag><rdf:value>Assessment</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>Education</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>educational assessment</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>EDUCAUSE</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>Higher Education</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>learners</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag><rdf:value>Learning</rdf:value></z:AutomaticTag>
</dc:subject>
<dc:subject>
<z:AutomaticTag>
<rdf:value>Teaching and learning</rdf:value>
</z:AutomaticTag>
</dc:subject>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://www.youtube.com/watch?v=_iv8A1pHNYA</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>2015-08-17</dc:date>
<dcterms:dateSubmitted>2016-01-17 18:50:57</dcterms:dateSubmitted>
<z:libraryCatalog>YouTube</z:libraryCatalog>
<z:runningTime>470 seconds</z:runningTime>
<dcterms:abstract>Several higher education learning and assessment professionals discuss the difficulties of measuring learning.</dcterms:abstract>
<dc:title>Why Is Measuring Learning So Difficult?</dc:title>
</bib:Recording>
<bib:Document rdf:about="http://www.smbc-comics.com/index.php?id=3978">
<z:itemType>webpage</z:itemType>
<dcterms:isPartOf>
<z:Website></z:Website>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Weinersmith</foaf:surname>
<foaf:givenname>Zach</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_24"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.smbc-comics.com/index.php?id=3978</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>January 5 2016</dc:date>
<dcterms:dateSubmitted>2016-01-18 18:17:09</dcterms:dateSubmitted>
<dc:title>Saturday Morning Breakfast Cereal</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_24">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.smbc-comics.com/index.php?id=3978</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 18:17:10</dcterms:dateSubmitted>
<dc:title>Saturday Morning Breakfast Cereal</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf">
<z:itemType>webpage</z:itemType>
<dcterms:isPartOf>
<z:Website></z:Website>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person><foaf:surname>RStudio</foaf:surname></foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_26"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>January 2015</dc:date>
<dcterms:dateSubmitted>2016-01-18 18:42:27</dcterms:dateSubmitted>
<dc:title>The Data Wrangling Cheatsheet</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_26">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 18:42:27</dcterms:dateSubmitted>
<dc:title>data-wrangling-cheatsheet - data-wrangling-cheatsheet.pdf</dc:title>
<z:linkMode>3</z:linkMode>
<link:type>application/pdf</link:type>
</z:Attachment>
<rdf:Description rdf:about="#item_27">
<z:itemType>conferencePaper</z:itemType>
<dcterms:isPartOf>
<bib:Journal>
<dc:title>Proceedings of the Fourth International Conference on Learning Analytics And Knowledge</dc:title>
</bib:Journal>
</dcterms:isPartOf>
<dc:publisher>
<foaf:Organization><foaf:name>ACM</foaf:name></foaf:Organization>
</dc:publisher>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Clow</foaf:surname>
<foaf:givenname>Doug</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<bib:pages>49–53</bib:pages>
<dc:date>2014</dc:date>
<dc:title>Data wranglers: human interpreters to help close the feedback loop</dc:title>
</rdf:Description>
<bib:Article rdf:about="http://theconversation.com/zuckerberg-is-ploughing-billions-into-personalised-learning-why-51940">
<z:itemType>magazineArticle</z:itemType>
<dcterms:isPartOf>
<bib:Periodical><dc:title>The Conversation</dc:title></bib:Periodical>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Kucirkova</foaf:surname>
<foaf:givenname>Natalia</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>FitzGerald</foaf:surname>
<foaf:givenname>Elizabeth</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_29"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://theconversation.com/zuckerberg-is-ploughing-billions-into-personalised-learning-why-51940</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>December 9 2015</dc:date>
<dcterms:dateSubmitted>2016-01-18 19:14:05</dcterms:dateSubmitted>
<dcterms:abstract>Zuckerburg wants to plough billions into personalised learning, but his way may not be the right way.</dcterms:abstract>
<dc:title>Zuckerberg is ploughing billions into 'personalised learning' – why?</dc:title>
</bib:Article>
<z:Attachment rdf:about="#item_29">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://theconversation.com/zuckerberg-is-ploughing-billions-into-personalised-learning-why-51940</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 19:14:05</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Recording rdf:about="https://www.youtube.com/watch?v=8CpRLplmdqE">
<z:itemType>videoRecording</z:itemType>
<dc:publisher>
<foaf:Organization><foaf:name>Youtube</foaf:name></foaf:Organization>
</dc:publisher>
<z:directors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Georgia Tech</foaf:surname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</z:directors>
<z:producers>
<rdf:Seq>
<rdf:li>
<foaf:Person><foaf:surname>Udacity</foaf:surname></foaf:Person>
</rdf:li>
</rdf:Seq>
</z:producers>
<link:link rdf:resource="#item_31"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://www.youtube.com/watch?v=8CpRLplmdqE</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>23 February 2015</dc:date>
<dcterms:dateSubmitted>2016-01-18 19:18:06</dcterms:dateSubmitted>
<z:runningTime>3:13</z:runningTime>
<dc:title>Feature Selection</dc:title>
</bib:Recording>
<z:Attachment rdf:about="#item_31">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://www.youtube.com/watch?v=8CpRLplmdqE</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 19:18:06</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="http://www.r-bloggers.com/introduction-to-feature-selection-for-bioinformaticians-using-r-correlation-matrix-filters-pca-backward-selection/">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog><dc:title>R-bloggers</dc:title></z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Perez-Riverol</foaf:surname>
<foaf:givenname>Yasset</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_33"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.r-bloggers.com/introduction-to-feature-selection-for-bioinformaticians-using-r-correlation-matrix-filters-pca-backward-selection/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>October 17 2013</dc:date>
<dcterms:dateSubmitted>2016-01-18 19:42:18</dcterms:dateSubmitted>
<dcterms:abstract>Bioinformatics is becoming more and more a Data Mining field. Every passing day, Genomics and Proteomics yield bucketloads of multivariate data (genes, proteins, DNA, identified peptides, structures), and every one of these biological data units are described by a number of features: length, physicochemical properties, scores, etc. Careful consideration of which features to select when trying to reduce the dimensionality of a specific dataset is, therefore, critical if one wishes to analyze and understand their impact on a model, or to identify what attributes produce a specific biological effect.
For instance, considering a predictive model C1A1 + C2A2 + C3A3 … CnAn = S, where Ci are constants, Ai are features or attributes and S is the predictor output (retention time, toxicity, score, etc). It is essential to identify which of those features (A1, A2 and A3…An) are most relevant to the model and to understand how they correlate with S, as working with such a subset will enable the researcher to discard a lot of irrelevant and redundant information.
There are two main approaches to this selection process:
Filter approaches: you select the features first, then you use this subset to execute classification or clustering algorithms, etc;
Embedded or Wrapper approaches a classification algorithm is applied to the raw dataset in order to identify the most relevant features.
One of the simplest and most powerful filter approaches is the use of correlation matrix filters. In regression and data mining problems, variables may be highly correlated with one another or "redundant". For example in cheminformatics, aromatic rings, bond counts and carbon atom counts can be very tightly correlated. If so, any one of these variables could be used as a proxy for all the others. It is best to choose the feature which is most likely to be the direct cause of toxicity, absorption or a specific response distribution.
Correlation Matrix :R Example: Removing features with more than 0.70 of Correlation.import java.util.List;library(corrplot)#corrplot: the library to compute correlation matrix.datMy <- read.table("data.csv", header = TRUE)#read the tab file using the read table function.datMy.scale<- scale(datMy[2:ncol(datMy)],center=TRUE,scale=TRUE);#scale all the features (from feature 2 bacause feature 1 is the predictor output)corMatMy <- cor(datMy.scale)#compute the correlation matrixcorrplot(corMatMy, order = "hclust")#visualize the matrix, clustering features by correlation index.
Resulting Output:
Highly Correlate Matrix for 400 features.
After inspecting the matrix, we set the correlation threshold at 0.70.highlyCor <- findCorrelation(corMatMy, 0.70)#Apply correlation filter at 0.70,#then we remove all the variable correlated with more 0.7.datMyFiltered.scale <- datMy.scale[,-highlyCor]corMatMy <- cor(datMyFiltered.scale)corrplot(corMatMy, order = "hclust")
Resulting Output:
Correlation matrix after filter.
Now it is possible to filter out “redundant” features by examining in detail the correlation matrix. Remember that the closer the correlation between two variables is to 1, the more related their behavior and the more redundant one is with respect to the other.
Using PCA
A relatively sophisticated way to do the correlation matrix analysis would be to perform a Principal Components Analysis (PCA). Feature extraction approaches transform data in high-dimensional space to a space of fewer dimensions. Principal component analysis, the most important linear technique for reducing dimensionality, performs a linear mapping of the data to a lower dimensional space in such a way that the variance of the data in the low-dimensional representation is maximized. In other words, PCA analysis builds a set of features by selecting those axes which maximize data variance.
Principal Component Analysis (PCA) is a multivariate technique that summarizes systematic patterns of variation in the data. From a data analysis standpoint, PCA is used for studying one table of observations and variables with the idea of transforming the observed variables into a set of new variables, the principal components, which are uncorrelated and explain the variation of the data. Therefore, PCA can be used to bring down a “complex” data set to a lower dimensionality, in order to reveal the structures or the dominant types of variations in both the observations and the variables.
PCA in R
In R, several functions from a number of different packages can be used to perform PCA. My suggestion is FactoMineR whose typical PCA output consists of a set of eigenvalues, a table with the scores or Principal Components (PCs), and a table of loadings (or correlations between variables and PCs).R Example: PCA function using FactoMineR for 400 features & 5 PCs
require(FactoMineR) # PCA with function PCAdatMy <- read.table("data.csv", header = TRUE)#read the tab file using the read table function.pca <- PCA(datMy, scale.unit=TRUE, ncp=5, graph=T)#scale all the features, ncp: number of dimensions kept in the results (by default 5)dimdesc(pca)#This line of code will sort the variables the most linked to each PC. It is very useful when you have many variables.
Here, you can find an excellent tutorial on FactoMineR and Principal Component analysis in R:
Wrapper Approaches with Backwards Selection"Wrapper" approaches can be viewed as built-in functions to optimize the number of predictors in the optimization or regression problem. Many feature selection routines use a "wrapper" approach to find appropriate variables such that an algorithm searching through feature space repeatedly fits the model with different predictor sets. The best predictor set is determined by some measure of performance (correlation R^2, root-mean-square deviation). An example of one search routine is backwards selection (a.k.a. recursive feature elimination). In many cases, using these models with built-in feature selection will be more efficient than algorithms where the search routine for the right predictors is external to the model. Built-in feature selection typically couples the predictor search algorithm with parameter estimation, and is usually optimized with a single objective function (e.g. error rates or likelihood).
Using Built-in Backward SelectionThe algorithm fits the model to all predictors. Each predictor is ranked according to relevance to the model. With each iteration of feature selection, the Ci top-ranked predictors are retained, the model is refit and performance is re-assessed. Built-in backward selection is being used for at least three purposes: predictor selection, model fitting and performance evaluation. Unless the number of samples is large, especially in relation to the number of variables, one static training set may not be able to fulfill these needs.
The "crantastic" package caret contains functions for training and plotting classification and regression models. In this case, the rfe function is used to obtain the potential selection. It has several arguments:
x, a matrix or data frame of predictor variables
y, a vector (numeric or factor) of outcomes
sizes, an integer vector for the specific subset sizes that should be tested (which must not include ncol(x))
rfeControl, a list of options that can be used to specify the model and the methods for prediction, ranking etc.
For a specific model, a set of functions must be specified in rfeControl$functions. There are a number of pre-defined sets of functions for several models, including: linear regression (in the object lmFuncs), random forests (rfFuncs), naive Bayes (nbFuncs), bagged trees (treebagFuncs) and functions that can be used with caret's train function (caretFuncs). R example: Selecting features using backward selection and the caret package
library(caret);#load caret librarydata_features<-as.matrix(read.table("data-features.csv",sep="\t", header=TRUE));#load data featuresdata_class<-as.matrix(read.table('data.csv', header=TRUE));#load data classesdata_features<- scale(data_features, center=TRUE, scale=TRUE);#scale data featuresinTrain <- createDataPartition(data_class, p = 3/4, list = FALSE); #Divide the dataset in train and test sets#Create the Training Dataset for Descriptors trainDescr <- data_features[inTrain,];# Create the Testing dataset for DescriptorstestDescr <- data_features[-inTrain,];trainClass <- data_class[inTrain];testClass <- data_class[-inTrain];descrCorr <- cor(trainDescr);highCorr <- findCorrelation(descrCorr, 0.70);trainDescr <- trainDescr[, -highCorr];testDescr <- testDescr[, -highCorr];# Here, we can included a correlation matrix analysis to remove the redundant features before the backwards selection svmProfile <- rfe(x=trainDescr, y = trainClass, sizes = c(1:5), rfeControl= rfeControl(functions = caretFuncs,number = 2),method = "svmRadial",fit = FALSE);#caret function: the rfe is the backwards selection, c is the possible sizes of the features sets, and method the optimization method is a support vector machine.
Finally I would like to recommned an excellent Review about Feature Selection in Bioinformatics.</dcterms:abstract>
<dc:title>Introduction to Feature selection for bioinformaticians using R, correlation matrix filters, PCA & backward selection</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_33">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://www.r-bloggers.com/introduction-to-feature-selection-for-bioinformaticians-using-r-correlation-matrix-filters-pca-backward-selection/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 19:42:18</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="https://opensource.com/education/14/10/why-open-data-matters-education">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog><dc:title>Opensource.com</dc:title></z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Sharma</foaf:surname>
<foaf:givenname>Aseem</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_35"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://opensource.com/education/14/10/why-open-data-matters-education</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>October 13 2014</dc:date>
<dcterms:dateSubmitted>2016-01-18 20:10:08</dcterms:dateSubmitted>
<dcterms:abstract>Aseem Sharma writes that improving the state of education and making children better learners is a human endeavor. It requires understanding the behavior of children, what motivates them, and what demotivates them. Technology solutions based on open data can strengthen and fuel that human endeavor on which much of our future depends.</dcterms:abstract>
<dc:title>Why open data matters in education</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_35">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>https://opensource.com/education/14/10/why-open-data-matters-education</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 20:10:08</dcterms:dateSubmitted>
<dc:title>Snapshot</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:BookSection rdf:about="http://faculty.ucr.edu/~hanneman/nettext/C1_Social_Network_Data.html">
<z:itemType>bookSection</z:itemType>
<dcterms:isPartOf>
<bib:Book>
<dc:title>Introduction to Social Network Methods</dc:title>
</bib:Book>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Hanneman</foaf:surname>
<foaf:givenname>R.A.</foaf:givenname>
</foaf:Person>
</rdf:li>
<rdf:li>
<foaf:Person>
<foaf:surname>Riddle</foaf:surname>
<foaf:givenname>M.</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_37"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://faculty.ucr.edu/~hanneman/nettext/C1_Social_Network_Data.html</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>2016-01-18 20:17:24</dc:date>
<dcterms:dateSubmitted>2016-01-18 20:17:24</dcterms:dateSubmitted>
<dc:title>Chapter 1: Social Network Data</dc:title>
</bib:BookSection>
<z:Attachment rdf:about="#item_37">
<z:itemType>attachment</z:itemType>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://faculty.ucr.edu/~hanneman/nettext/C1_Social_Network_Data.html</rdf:value>
</dcterms:URI>
</dc:identifier>
<dcterms:dateSubmitted>2016-01-18 20:17:25</dcterms:dateSubmitted>
<dc:title>Introduction to Social Network Methods: Chapter 1: Social Network Data</dc:title>
<z:linkMode>3</z:linkMode>
</z:Attachment>
<bib:Document rdf:about="http://venturebeat.com/2014/03/13/microsoft-and-knewton-partner-up-to-bring-adaptive-learning-to-publishers-schools/">
<z:itemType>blogPost</z:itemType>
<dcterms:isPartOf>
<z:Blog><dc:title>VentureBeat</dc:title></z:Blog>
</dcterms:isPartOf>
<bib:authors>
<rdf:Seq>
<rdf:li>
<foaf:Person>
<foaf:surname>Farr</foaf:surname>
<foaf:givenname>Christina</foaf:givenname>
</foaf:Person>
</rdf:li>
</rdf:Seq>
</bib:authors>
<link:link rdf:resource="#item_39"/>
<dc:identifier>
<dcterms:URI>
<rdf:value>http://venturebeat.com/2014/03/13/microsoft-and-knewton-partner-up-to-bring-adaptive-learning-to-publishers-schools/</rdf:value>
</dcterms:URI>
</dc:identifier>
<dc:date>March 13 2014</dc:date>
<dcterms:dateSubmitted>2016-01-19 14:56:06</dcterms:dateSubmitted>
<dcterms:abstract>Knewton provides an open API for "adaptive learning," an insidery term for computing that helps students learn at their own pace.</dcterms:abstract>
<dc:title>Microsoft and Knewton partner up to bring adaptive learning to publishers & schools</dc:title>
</bib:Document>
<z:Attachment rdf:about="#item_39">