From 279980ea7e417698418d93a68fa856c749ce340f Mon Sep 17 00:00:00 2001 From: Xiaochang Wu Date: Tue, 22 Jun 2021 18:00:08 +0800 Subject: [PATCH] [ML-53] [CPU] Add Linear & Ridge Regression (#75) * linear regression using DAL * Coding done * linear regression trainwithDAL pass * Remove naive bayes files * fix conflicts * Fix crash due to DAL LiR return n+1 weights columns (w0...wn) * Add linear regression example * Add condition to enable oap and switch linear & ridge regressions Add ridgeParameter as input Code cleanup * nit * nit * move LinearRegression.scala to fit in new dir structure * Add 3.0.x and 3.1.1 support * turn spark.oap.mllib.enabled true by default, ignore tests not passed, will be fixed later Co-authored-by: Jiang, Bo --- .../data/sample_linear_regression_data.txt | 501 +++++++ examples/linear-regression/build.sh | 3 + examples/linear-regression/pom.xml | 94 ++ examples/linear-regression/run.sh | 28 + .../examples/ml/LinearRegressionExample.scala | 141 ++ .../apache/spark/ml/regression/LiRResult.java | 21 + .../main/native/LinearRegressionDALImpl.cpp | 250 ++++ mllib-dal/src/main/native/Makefile | 6 +- mllib-dal/src/main/native/build-jni.sh | 1 + ...rk_ml_regression_LinearRegressionDALImpl.h | 21 + .../regression/LinearRegressionDALImpl.scala | 120 ++ .../org/apache/spark/ml/util/OneDAL.scala | 16 +- .../org/apache/spark/ml/util/Utils.scala | 7 +- .../ml/regression/LinearRegression.scala | 1063 +++++++++++++ .../ml/regression/LinearRegression.scala | 1057 +++++++++++++ .../ml/regression/LinearRegression.scala | 1057 +++++++++++++ .../ml/regression/LinearRegression.scala | 1103 ++++++++++++++ .../IntelLinearRegressionSuite.scala | 1314 +++++++++++++++++ mllib-dal/test.sh | 10 +- 19 files changed, 6804 insertions(+), 9 deletions(-) create mode 100755 examples/data/sample_linear_regression_data.txt create mode 100755 examples/linear-regression/build.sh create mode 100644 examples/linear-regression/pom.xml create mode 100755 examples/linear-regression/run.sh create mode 100644 examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala create mode 100644 mllib-dal/src/main/java/org/apache/spark/ml/regression/LiRResult.java create mode 100644 mllib-dal/src/main/native/LinearRegressionDALImpl.cpp create mode 100644 mllib-dal/src/main/native/javah/org_apache_spark_ml_regression_LinearRegressionDALImpl.h create mode 100644 mllib-dal/src/main/scala/org/apache/spark/ml/regression/LinearRegressionDALImpl.scala create mode 100644 mllib-dal/src/spark-3.0.0/main/scala/org/apache/spark/ml/regression/LinearRegression.scala create mode 100644 mllib-dal/src/spark-3.0.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala create mode 100644 mllib-dal/src/spark-3.0.2/main/scala/org/apache/spark/ml/regression/LinearRegression.scala create mode 100644 mllib-dal/src/spark-3.1.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala create mode 100755 mllib-dal/src/test/scala/org/apache/spark/ml/regression/IntelLinearRegressionSuite.scala diff --git a/examples/data/sample_linear_regression_data.txt b/examples/data/sample_linear_regression_data.txt new file mode 100755 index 000000000..9aaaa4297 --- /dev/null +++ b/examples/data/sample_linear_regression_data.txt @@ -0,0 +1,501 @@ +-9.490009878824548 1:0.4551273600657362 2:0.36644694351969087 3:-0.38256108933468047 4:-0.4458430198517267 5:0.33109790358914726 6:0.8067445293443565 7:-0.2624341731773887 8:-0.44850386111659524 9:-0.07269284838169332 10:0.5658035575800715 +0.2577820163584905 1:0.8386555657374337 2:-0.1270180511534269 3:0.499812362510895 4:-0.22686625128130267 5:-0.6452430441812433 6:0.18869982177936828 7:-0.5804648622673358 8:0.651931743775642 9:-0.6555641246242951 10:0.17485476357259122 +-4.438869807456516 1:0.5025608135349202 2:0.14208069682973434 3:0.16004976900412138 4:0.505019897181302 5:-0.9371635223468384 6:-0.2841601610457427 7:0.6355938616712786 8:-0.1646249064941625 9:0.9480713629917628 10:0.42681251564645817 +-19.782762789614537 1:-0.0388509668871313 2:-0.4166870051763918 3:0.8997202693189332 4:0.6409836467726933 5:0.273289095712564 6:-0.26175701211620517 7:-0.2794902492677298 8:-0.1306778297187794 9:-0.08536581111046115 10:-0.05462315824828923 +-7.966593841555266 1:-0.06195495876886281 2:0.6546448480299902 3:-0.6979368909424835 4:0.6677324708883314 5:-0.07938725467767771 6:-0.43885601665437957 7:-0.608071585153688 8:-0.6414531182501653 9:0.7313735926547045 10:-0.026818676347611925 +-7.896274316726144 1:-0.15805658673794265 2:0.26573958270655806 3:0.3997172901343442 4:-0.3693430998846541 5:0.14324061105995334 6:-0.25797542063247825 7:0.7436291919296774 8:0.6114618853239959 9:0.2324273700703574 10:-0.25128128782199144 +-8.464803554195287 1:0.39449745853945895 2:0.817229160415142 3:-0.6077058562362969 4:0.6182496334554788 5:0.2558665508269453 6:-0.07320145794330979 7:-0.38884168866510227 8:0.07981886851873865 9:0.27022202891277614 10:-0.7474843534024693 +2.1214592666251364 1:-0.005346215048158909 2:-0.9453716674280683 3:-0.9270309666195007 4:-0.032312290091389695 5:0.31010676221964206 6:-0.20846743965751569 7:0.8803449313707621 8:-0.23077831216541722 9:0.29246395759528565 10:0.5409312755478819 +1.0720117616524107 1:0.7880855916368177 2:0.19767407429003536 3:0.9520689432368168 4:-0.845829774129496 5:0.5502413918543512 6:-0.44235539500246457 7:0.7984106594591154 8:-0.2523277127589152 9:-0.1373808897290778 10:-0.3353514432305029 +-13.772441561702871 1:-0.3697050572653644 2:-0.11452811582755928 3:-0.807098168238352 4:0.4903066124307711 5:-0.6582805242342049 6:0.6107814398427647 7:-0.7204208094262783 8:-0.8141063661170889 9:-0.9459402662357332 10:0.09666938346350307 +-5.082010756207233 1:-0.43560342773870375 2:0.9349906440170221 3:0.8090021580031235 4:-0.3121157071110545 5:-0.9718883630945336 6:0.6191882496201251 7:0.0429886073795116 8:0.670311110015402 9:0.16692329718223786 10:0.37649213869502973 +7.887786536531237 1:0.11276440263810383 2:-0.7684997525607482 3:0.1770172737885798 4:0.7902845707138706 5:0.2529503304079441 6:-0.23483801763662826 7:0.8072501895004851 8:0.6673992021927047 9:-0.4796127376677324 10:0.9244724404994455 +14.323146365332388 1:-0.2049276879687938 2:0.1470694373531216 3:-0.48366999792166787 4:0.643491115907358 5:0.3183669486383729 6:0.22821350958477082 7:-0.023605251086149304 8:-0.2770587742156372 9:0.47596326458377436 10:0.7107229819632654 +-20.057482615789212 1:-0.3205057828114841 2:0.51605972926996 3:0.45215640988181516 4:0.01712446974606241 5:0.5508198371849293 6:-0.2478254241316491 7:0.7256483175955235 8:0.39418662792516 9:-0.6797384914236382 10:0.6001217520150142 +-0.8995693247765151 1:0.4508991072414843 2:0.589749448443134 3:0.6464818311502738 4:0.7005669004769028 5:0.9699584106930381 6:-0.7417466269908464 7:0.22818964839784495 8:0.08574936236270037 9:-0.6945765138377225 10:0.06915201979238828 +-19.16829262296376 1:0.09798746565879424 2:-0.34288007110901964 3:0.440249350802451 4:-0.22440768392359534 5:-0.9695067570891225 6:-0.7942032659310758 7:-0.792286205517398 8:-0.6535487038528798 9:0.7952676470618951 10:-0.1622831617066689 +5.601801561245534 1:0.6949189734965766 2:-0.32697929564739403 3:-0.15359663581829275 4:-0.8951865090520432 5:0.2057889391931318 6:-0.6676656789571533 7:-0.03553655732400762 8:0.14550349954571096 9:0.034600542078191854 10:0.4223352065067103 +-3.2256352187273354 1:0.35278245969741096 2:0.7022211035026023 3:0.5686638754605697 4:-0.4202155290448111 5:-0.26102723928249216 6:0.010688215941416779 7:-0.4311544807877927 8:0.9500151672991208 9:0.14380635780710693 10:-0.7549354840975826 +1.5299675726687754 1:-0.13079299081883855 2:0.0983382230287082 3:0.15347083875928424 4:0.45507300685816965 5:0.1921083467305864 6:0.6361110540492223 7:0.7675261182370992 8:-0.2543488202081907 9:0.2927051050236915 10:0.680182444769418 +-0.250102447941961 1:-0.8062832278617296 2:0.8266289890474885 3:0.22684501241708888 4:0.1726291966578266 5:-0.6778773666126594 6:0.9993906921393696 7:0.1789490173139363 8:0.5584053824232391 9:0.03495894704368174 10:-0.8505720014852347 +12.792267926563595 1:-0.008461200645088818 2:-0.648273596036564 3:-0.005334477339629995 4:0.3781469006858833 5:0.30565234666790686 6:-0.2822867492866177 7:0.10175120738413801 8:0.5342432888482425 9:0.05146513075475534 10:-0.6459729964194652 +6.082192787194888 1:0.42519013450094767 2:0.09441503345243984 3:-0.07898439043103522 4:-0.32207498048636474 5:-0.9180071861219266 6:0.5951317320731633 7:0.41000814588717693 8:-0.3926260640533046 9:0.2789036768568971 10:0.13163692286014528 +-7.481405271455238 1:0.03324842612749346 2:0.07055844751995122 3:-0.47199515597021113 4:-0.682690342465275 5:0.3983414713797069 6:-0.2136729393256811 7:-0.09066563475481249 8:-0.4640338194317184 9:-0.03513782089224482 10:-0.1711809802758364 +6.739533816100517 1:0.1774546460228057 2:-0.6783644553523549 3:-0.47871398278230504 4:0.02272121490463097 5:-0.5047649289302389 6:0.26479596144873896 7:-0.32045436544054096 8:0.3113047940487379 9:0.6269418147567556 10:0.9710114516962312 +3.780807062175497 1:0.01715676997104909 2:0.8975962429865936 3:-0.46594560920034134 4:0.2873623499953055 5:0.8894362304584083 6:0.17973981232418468 7:0.49105791400707743 8:-0.7359842740294882 9:0.38941133808001127 10:-0.7151884777228046 +4.564039393483412 1:0.07478785545033317 2:-0.8672651994084235 3:0.450599300176334 4:0.35104802298560056 5:0.6797318185095045 6:-0.03891997518827006 7:-0.33208695871398675 8:0.6166574577055226 9:0.5730212324012205 10:-0.4194925751047054 +-0.3195679646035633 1:0.054527683864544096 2:-0.15591931640565093 3:0.9266742559542833 4:0.888522581905147 5:0.6576203900699167 6:0.6417770212400336 7:0.7509788029052338 8:-0.3104974571382815 9:0.7234744267051683 10:-0.15869049651427103 +11.290452658023497 1:0.20173310976772196 2:0.8657502566551409 3:0.9325160601080682 4:0.24570884032596263 5:-0.6546108813337841 6:-0.14020032028377583 7:-0.8825687891702743 8:-0.21420166926412865 9:-0.8600275184792756 10:-0.7990574622230739 +-4.003499192090455 1:0.8325875503351796 2:-0.5956350140619129 3:0.12598048009007923 4:0.12340188733473134 5:-0.839435659309717 6:-0.16623481818728414 7:0.12028795301041662 8:-0.7994713170657952 9:0.2216721974907896 10:0.8407561415075087 +-19.872991038068406 1:-0.9325810772922609 2:-0.6411471147334535 3:0.9949216290375054 4:0.483048267470493 5:-0.8736297429070232 6:-0.36222771685582544 7:0.26397860162786957 8:0.45527588775737704 9:-0.9424989711186325 10:0.6251162293059616 +10.502762149373098 1:-0.2307778924009991 2:0.6977871128979924 3:0.022830408261390822 4:0.6257738824362347 5:0.9770979848265122 6:0.09985730624684575 7:-0.9755858424230182 8:-0.689969833240031 9:-0.7294587311376761 10:0.3496326193951331 +-14.328978509075442 1:0.37929821892417404 2:0.8402056881660709 3:-0.1806835799958202 4:0.766314307210441 5:0.865876513623024 6:-0.7113501219432434 7:-0.0932956557986735 8:-0.7042025810921411 9:0.47530696925672267 10:-0.4629102077669889 +-16.26143027545273 1:-0.9309578475799722 2:0.7591795880911123 3:0.06296957473213705 4:0.786790093290086 5:-0.9527998391625465 6:-0.08573982501921895 7:-0.3812232026687308 8:-0.6890669703685022 9:0.25415911467755015 10:-0.07664746267502509 +11.772544195529013 1:0.3614756404325046 2:0.14508027508253818 3:0.23042774014795753 4:0.4164348685332022 5:0.4109091750657461 6:0.03853098236933272 7:0.38911994885223145 8:-0.5031309357181766 9:-0.596467768575587 10:0.17884522225228028 +14.697703557439503 1:0.24508864174863 2:0.7576193329655578 3:0.09030511120334461 4:0.9537528991778741 5:-0.7224092160621338 6:-0.34089385162121943 7:0.6924170720838818 8:0.32912306214891784 9:-0.4064624712125904 10:-0.5344662061201593 +-13.976130931152703 1:0.5891192531479754 2:0.29862103742464274 3:-0.36153976712796343 4:-0.6552669564323226 5:-0.22672513691161766 6:0.3001336202535376 7:0.34490251346382617 8:0.2072633053920192 9:-0.5659371284058774 10:0.49599636156628835 +-14.762758252931127 1:0.31302496164254223 2:-0.6062773982342133 3:-0.9874007658402217 4:-0.6214904627601421 5:-0.11421073677207683 6:-0.5850843421161205 7:0.1250679146774638 8:-0.7108170726393621 9:-0.6888351241194393 10:0.6077343683084389 +-3.300641320608255 1:-0.1407178879203672 2:0.12960233233004925 3:-0.4236196478321872 4:0.7903078296084356 5:-0.8755754953628643 6:-0.2062360260394529 7:-0.045680124889026175 8:0.783182093429277 9:-0.02995737262668463 10:-0.33382351650328435 +-15.72351561304857 1:-0.1802575775708093 2:-0.991006951265341 3:-0.9107951763247621 4:0.9069820084047908 5:-0.12691921206803047 6:-0.7087012119383593 7:-0.9179510577925369 8:0.18480349982718325 9:-0.4478459144114004 10:-0.5560585660624608 +-22.949825936196074 1:0.4797855980916854 2:0.01997502546020402 3:-0.8827928315487465 4:0.2755107907750989 5:0.015544482147298977 6:0.9652687138748801 7:0.6622667860970648 8:-0.7708138539912186 9:0.17728148663006627 10:0.47818190728952925 +12.092431628826905 1:0.1358843437335564 2:0.03643446587894239 3:-0.31070823939673287 4:0.5283033206569152 5:0.3469111543845367 6:-0.5162518174930761 7:0.24270234207184016 8:0.7352292800096338 9:0.8860322286740037 10:0.6748068653962045 +-23.51088409032297 1:-0.4683538422180036 2:0.1469540185936138 3:0.9113612952591796 4:-0.9838482669789823 5:0.4506466371133697 6:0.6456121712599778 7:0.8264783725578371 8:0.562664168655115 9:-0.8299281852090683 10:0.40690300256653256 +5.998186124881712 1:-0.9781302074883151 2:0.32984303335155785 3:0.7303430847899663 4:0.841481297188956 5:0.05580773881989276 6:0.7130788298702062 7:-0.218087116119847 8:-0.9889494995220598 9:0.9182854134226501 10:-0.7501751701020942 +9.852316338642547 1:0.146854160091757 2:-0.3611508707370965 3:0.3517016971654914 4:0.6187697988029395 5:-0.010768583697787548 6:0.5236725885871243 7:0.5945666964145524 8:-0.009180562740628506 9:-0.44474762415618274 10:0.41852743519493685 +-5.313930756588526 1:-0.6304209277071555 2:-0.37010359785263813 3:-0.3194739026510125 4:-0.750533359080716 5:0.45500303301733114 6:-0.012727544364283805 7:-0.43941651856862274 8:0.927108876532093 9:-0.24164903158058149 10:0.44134972919002124 +-4.2775224863223915 1:-0.35785764991284363 2:0.942797043714243 3:0.4539569191274251 4:-0.6944903010994341 5:-0.08357221983075225 6:0.4433049548665855 7:-0.5488972050023557 8:-0.24014623658145773 9:-0.6178118485382511 10:-0.4575463952834564 +-10.57769830424322 1:0.22693864400257335 2:-0.041639691095668674 3:0.9948726461115123 4:-0.7450471554938383 5:-0.1114847126717804 6:-0.27881184842402673 7:0.029766812446276214 8:-0.3727649352432578 9:-0.7791732805568077 10:0.9425576681069683 +-0.8430338600258201 1:0.4607090007225536 2:-0.6079961642969514 3:-0.5671626932935381 4:0.12784576080614185 5:-0.30766031989910236 6:-0.21232963505711555 7:0.3310463755850872 8:-0.6807682731528943 9:0.7826634145951483 10:0.0608057623636995 +13.450586257053727 1:-0.2697769964284986 2:0.07743737732312428 3:-0.8459687499864881 4:0.6091901514177853 5:-0.9464815428211699 6:0.15780407422581533 7:-0.28552052619478996 8:-0.27500859181806403 9:-0.7207541548282903 10:0.05215593729084533 +20.358241877831016 1:0.29768927445620164 2:-0.5379390525163252 3:0.6591913001003027 4:0.6635992348010928 5:0.3786594651413009 6:-0.7217135278882543 7:0.9634013908615768 8:0.03961253903778861 9:0.1335121312144949 10:0.7933944303463509 +9.800993960518852 1:0.39896823489212285 2:0.30948413101894023 3:0.08568060094378493 4:-0.7454513450113371 5:0.8054125831421357 6:-0.24464240413169347 7:-0.18294406588625112 8:-0.883455504399858 9:0.2468431033653562 10:-0.708151566382103 +-21.432387764165806 1:-0.4785033857256795 2:0.520350718059089 3:-0.2988515012130126 4:-0.46260150057299754 5:0.5394344995663083 6:0.39320468081626836 7:0.1890560923345248 8:0.13123799325264507 9:0.43613839380760355 10:0.39541998419731494 +-4.090570760187878 1:0.3909705814857716 2:0.9830271975811611 3:0.672523651785939 4:0.0035177223850744177 5:0.567082732451311 6:-0.2620454326881394 7:0.46622578556708105 8:0.646246879249865 9:0.4263175536668733 10:0.8982696975276223 +3.7459201216906926 1:-0.9480167656870653 2:-4.888270196095057E-4 3:0.48226844071577646 4:-0.23706663537631645 5:0.22420266627462127 6:0.2981747607694978 7:0.3893425967975348 8:0.6302701381298614 9:-0.21909113816064196 10:0.8371697958140494 +9.767952084958061 1:-0.2300790371078303 2:-0.4457883630748676 3:0.28710853302295325 4:0.7112839743052013 5:-0.8765858382640623 6:-0.6470779468607217 7:0.4369262584371727 8:-0.7175412028407337 9:0.5506733477278882 10:0.5393007189573547 +6.9802839308913365 1:0.21769855012808215 2:0.8653818331675485 3:0.2322943113578111 4:0.3760591265797468 5:0.06554014167292377 6:0.6866096712933549 7:0.866929973115441 8:-0.6462263417217329 9:0.2507247465275353 10:-0.7005877782050307 +16.014720800069103 1:0.6058055248984549 2:0.048517868234337014 3:-0.15744912875924877 4:0.32598079708869365 5:-0.587791997223768 6:-0.4636187312118474 7:0.7771908559246068 8:-0.349403853888719 9:0.229800030145503 10:-0.674614818934488 +8.417571532985823 1:-0.21164946152466801 2:-0.9981936663594053 3:0.8611869575187896 4:0.11100891297254312 5:-0.7406067304729631 6:-0.7613837395522254 7:-0.9617573325708704 8:0.5697426971647488 9:-0.5830879716990833 10:0.5951448538064159 +-12.491442077546413 1:-0.19172117564625735 2:-0.12421304883392126 3:0.7095605786791346 4:0.6401582292398038 5:-0.9347790209840108 6:0.6592209285686903 7:0.702282297844389 8:-0.22765902007749528 9:-0.17746922342943816 10:0.7196663432778121 +-8.605713514762092 1:0.36490454976480846 2:0.6991204480538957 3:0.6546945560337121 4:-0.032324845758738174 5:0.2453935969836043 6:0.5363119225093116 7:0.6266741350524205 8:-0.2132266305382322 9:-0.308105870487996 10:-0.08219413867616465 +-10.35591860037468 1:-0.014204168485027147 2:-0.7077035677144325 3:0.024004217785642767 4:0.818971992516166 5:0.9081305263471056 6:0.808854493237229 7:-0.6474336785461867 8:-0.32559288177031465 9:-0.32850453072496055 10:-0.7035310416695784 +3.605002621628445 1:0.6085817977516599 2:0.8101072412357928 3:0.7697891508923966 4:-0.5738750389864677 5:-0.734314989863889 6:-0.7879014492215499 7:0.6884442838920775 8:-0.46131231930402383 9:-0.7730585954271005 10:-0.7819874019145132 +12.30435312415091 1:0.3283668768730639 2:-0.18316686990068187 3:0.3955614099142126 4:0.8450470350842108 5:0.3958042901611589 6:0.6578475571960676 7:-0.4395488558075096 8:0.15720430113495376 9:-0.5318362828977672 10:0.45585285255232044 +9.020048819638827 1:-0.5986521145193395 2:0.3266542215286443 3:-0.09911773729611917 4:-0.21478254478908676 5:0.6546175049764293 6:-0.1414796368932345 7:0.25802631337510085 8:-0.6773828562539816 9:-0.22038193899258718 10:-0.17465737306657902 +14.854262978981406 1:0.5293763924477841 2:-0.24658868331583683 3:0.8268631648872109 4:0.8969207203400265 5:0.03933229861213983 6:-0.6212951181360529 7:-0.36695460282178205 8:-0.5468014636386027 9:-0.3419492829414976 10:-0.8273314086998671 +5.658665647926016 1:0.9543096383762801 2:0.13230023957687176 3:-0.3071929861496465 4:-0.3646067841449696 5:0.6979929890816723 6:-0.20721664168809228 7:0.6676482547655365 8:0.944757051233543 9:0.024377296173674567 10:-0.9413728609667691 +-6.930603551528371 1:0.09198647857985232 2:-0.3685113649452161 3:-0.2361728930325453 4:0.3674268130607439 5:0.27385598384498344 6:-0.7151900241735676 7:0.3310154476154119 8:-0.24328111897361682 9:0.2511378679668912 10:-0.35825141175578934 +13.361196783041926 1:0.11676665169094824 2:-0.49968608916548307 3:0.9941342810313298 4:-0.17858967215374988 5:0.1993744673440312 6:0.14596837574280297 7:-0.8245495433125194 8:-0.5637934691545672 9:-0.8589185806222286 10:-0.4923216901915597 +-3.280508467210429 1:-0.9917770074538397 2:-0.1547651813493751 3:0.621733177563484 4:0.7303326279246298 5:-0.0786900332560696 6:0.9107127797641994 7:0.7104513024299466 8:-0.32858522942354407 9:0.17013652749847386 10:0.27656984316288824 +11.13509519160867 1:0.6874932143640391 2:-0.46610293161038907 3:0.8744681017967024 4:0.40900365224695956 5:-0.49770054448432055 6:-0.0635770754462921 7:-0.5705387648707747 8:-0.577988250149829 9:-0.8099463063934682 10:0.42132700180827354 +-11.857350365429426 1:-0.24607974991258308 2:-0.943388538022258 3:0.8679112109377674 4:0.7779951176637694 5:-0.5802336023276593 6:-0.9093352471884992 7:0.29337797938742316 8:0.498519874589175 9:0.3493034812120912 10:-0.07307210651399076 +11.421632138263703 1:0.3911519359353859 2:-0.8154393787235621 3:0.47194271125243237 4:0.14014792298759593 5:-0.3589345913619957 6:0.7887695409762479 7:0.49962792312858895 8:-0.6402670146359797 9:-0.2314041601683119 10:-0.798901341175887 +5.194792012146463 1:0.810279303469398 2:-0.9772756877199589 3:-0.20925958437085557 4:0.8797562461102444 5:0.3211532423260066 6:0.25250279470783754 7:0.14387831263435813 8:-0.021466789385169882 9:0.18909293657271564 10:-0.5981349964027893 +12.242677118499806 1:0.3565715672082048 2:0.7366743237221687 3:0.1922233582434527 4:-0.3551925780624561 5:0.5290849503909634 6:0.7744214641246749 7:0.7277215028580597 8:-0.590440215391044 9:0.7427328184290733 10:-0.6231904162251609 +3.496172341296411 1:0.5028717258135624 2:-0.5838871888624848 3:-0.5540116561110324 4:0.8502487679795261 5:-0.7983061034328727 6:-0.3853123296389005 7:-0.1493800684643869 8:0.6008798629354264 9:-0.32299062155495406 10:-0.5827019502242026 +-15.437384793431217 1:0.41994681418237345 2:0.7106426870657483 3:-0.45211033467567696 4:-0.7272406549392239 5:-0.35736594496490737 6:0.4764507578985955 7:-0.5249912641281373 8:0.8562010912051132 9:0.45927621623833637 10:-0.3701817429794385 +5.490036861541498 1:0.8414999442459015 2:0.9273442862476728 3:-0.054654787893199774 4:-0.23126134156257327 5:-0.9155048245317694 6:0.25750538376376975 7:-0.8470916763665326 8:0.9105674676753848 9:0.5026028522378054 10:-0.06650501561108468 +-1.074065343287859 1:0.37484830603001607 2:-0.9858854245832975 3:0.007159356555897611 4:0.8172796295244154 5:0.519147377529164 6:0.8211049991970722 7:0.9901658817979146 8:-0.026519560032641998 9:-0.2328762488733862 10:0.43161994187258035 +2.0482082496444622 1:0.24940246021565793 2:0.47248358864259177 3:0.23833814894291105 4:-0.3449172512379757 5:0.7412869866239866 6:0.1351422898741914 7:-0.003784141556894216 8:-0.6321917152754075 9:0.8246267827865776 10:0.5057520480449009 +16.709794859608397 1:-0.5977424405191092 2:-0.13991362149785713 3:0.613487896720806 4:-0.37681525320218157 5:-0.4369592282569783 6:0.4702242879506955 7:0.07498463532645339 8:-0.9942304127133292 9:0.41304209196175257 10:0.6799250665519481 +4.598881854940949 1:-0.41212838137243835 2:0.6737124633791323 3:0.8376369191216593 4:0.2848328781926128 5:-0.17960265353296 6:0.0035488712665472377 7:-0.8355355482928055 8:-0.7439716673142398 9:-0.009043467128117433 10:0.7423272515054122 +9.566038608555402 1:-0.662329643040616 2:0.4727113884417973 3:-0.15734218732411365 4:-0.3950754785173889 5:0.13837083076070011 6:0.633261314089351 7:0.9929998062307679 8:-0.4639028424346423 9:-0.073992579817449 10:0.3413166410117088 +1.629198477883475 1:-0.2875719791707101 2:0.9395753700232541 3:-0.45090801750966314 4:-0.384528069378699 5:-0.35937736478702753 6:0.9597102694501136 7:-0.6898325123180971 8:-0.11436012866371303 9:-0.5330550575952768 10:0.24688769932037258 +-7.374620970147229 1:0.16864051681940984 2:-0.08391828256018252 3:-0.8184503043836224 4:0.5461252511055263 5:0.7264676659099087 6:-0.9654384426822686 7:-0.8537533138667612 8:0.9189716013058653 9:-0.03449322582531389 10:0.5490329745887035 +-0.5741704240890674 1:0.9392753294760656 2:-0.5579682000156501 3:-0.8083270703362093 4:-0.7022804026958895 5:-0.30426803430649896 6:0.8211432527140852 7:-0.8101343265051797 8:-0.0945946325760949 9:0.49546915718101814 10:0.5184327698839013 +12.583032451116004 1:0.20496323995364651 2:0.5082017540304999 3:0.2428646053751764 4:0.7101854338863274 5:-0.9619925264660094 6:0.4610134502825909 7:-0.5620669052678122 8:0.6766614078376236 9:-0.7169693435782278 10:-0.14362322382035164 +-10.489157123372898 1:-0.7441633083637054 2:0.07069898351187809 3:-0.47119552972566336 4:-0.43970155900871344 5:0.43192289605353973 6:-0.0798550143899397 7:0.2111188135787776 8:0.9101748615761336 9:-0.4079984876629721 10:-0.8101424982394589 +-3.811365493249739 1:0.7250263461647963 2:0.22182621035333838 3:-0.12735342714215725 4:0.26222861719040624 5:0.3928174057935714 6:0.817131411734006 7:-0.056109765698795 8:0.7908779197353637 9:-0.06768319505245768 10:0.4107045608924882 +-7.604636483513961 1:0.876751634787073 2:0.04037085575852295 3:0.18142385658771398 4:0.38350565074271903 5:-0.30937664332011905 6:-0.9544807672006823 7:0.008643477632712449 8:-0.27676843472226276 9:-0.12938540988602476 10:-0.2929762262661819 +-1.9889499615051784 1:-0.4243149295090465 2:0.22578711943818686 3:0.662530786460152 4:0.28592235843136105 5:0.4170345231441832 6:0.9290881132120887 7:0.5332443368002588 8:-0.33248958421809927 9:0.16273139830495942 10:0.6899022585936985 +-1.99891354174786 1:-0.1732078452611825 2:0.2361029542296429 3:-0.8455867017505336 4:0.31638672033240867 5:-0.648387667144986 6:-0.7647886103837449 7:0.6910155501192978 8:-0.2665663102538198 9:-0.5980899570876459 10:-0.9165896495676276 +9.74348630903265 1:0.18934450539532244 2:-0.715110505416745 3:-0.453777527810155 4:0.2743741252197758 5:-0.8439310405443103 6:-0.533835190276116 7:-0.5911710854054728 8:0.21026462628920695 9:-0.45849607678093585 10:0.1327074179200407 +20.221961806051706 1:0.624731930687735 2:-0.39914395421723015 3:0.781887900750925 4:0.5442619051596436 5:0.16651193067479153 6:0.9064846121246533 7:-0.3643159594276202 8:-0.5182065337246469 9:-0.6785628247191553 10:0.7111152852903913 +20.456947955410897 1:-0.21923785332346513 2:0.11340668617783778 3:0.7397883986253251 4:-0.11748081084695605 5:0.06314872700777197 6:-0.7124574845946587 7:0.18043581960897104 8:-0.09023925260092103 9:-0.7256417560118238 10:-0.5038088673851804 +12.241006086129564 1:-0.15271598143132215 2:0.9038942665552285 3:-0.6168514099878155 4:-0.12219038322317011 5:0.5402785935596728 6:0.4059744401803913 7:0.258870596734184 8:0.3190881033039108 9:0.2372469007313076 10:0.367188299614863 +3.980473021620311 1:-0.9025895351376971 2:-0.03333947011476446 3:-0.8220776066161464 4:0.449117985679933 5:0.9970519437779266 6:0.27430911004640457 7:0.039081352882204046 8:-0.8621514950929796 9:-0.569587565933642 10:-0.9118346349929578 +-13.420594775890757 1:0.3697979495309094 2:0.07383664120111888 3:0.7199366131785143 4:0.2118625428869032 5:-0.9015976323216077 6:-0.5298395275757712 7:-0.9517419542156635 8:0.39554920787574743 9:-0.3721957439110324 10:-0.4750272836396878 +-1.052659359353786 1:0.02106845330888185 2:0.7571245678782959 3:0.8034228830223251 4:0.32968340513846917 5:-0.6510386482911554 6:0.2710115488605187 7:-0.1319580272290235 8:0.932600992666184 9:0.8260461527035414 10:-0.8507648952138052 +9.813440129324034 1:0.41048687946340134 2:0.9384639988086239 3:0.4569555844323441 4:-0.3084729082645552 5:-0.7299010284877061 6:-0.6925012997779212 7:-0.6798013915257548 8:-0.504368104320321 9:-0.6234398059664716 10:0.8633407902005543 +-2.8942782378157714 1:0.5546381825677706 2:0.7959405841824887 3:0.584699836289184 4:-0.5726371777829862 5:-0.2827976152663936 6:0.138034013875719 7:-0.2935080791661324 8:-0.5323479091625714 9:0.6837641044797451 10:0.5986680812032501 +8.562937733537664 1:0.14753220510180776 2:-0.31591341855048327 3:-0.748545617199091 4:0.3251888821665734 5:0.8228589483149358 6:0.046659706976506676 7:-0.35049927996132624 8:0.2953170004605874 9:-0.6429374177050204 10:0.4624083116836044 +13.413187970975178 1:-0.7229883396779724 2:0.8876940454894067 3:-0.033794226589695775 4:0.46700071356381523 5:0.1599557295166274 6:-0.8944619785248653 7:-0.1258464584151997 8:-0.8797551785991506 9:-0.14408879184669354 10:0.11020655997336015 +-5.491389764900794 1:-0.366507395597937 2:0.630480481240723 3:-0.16600801981741609 4:0.09842042773854076 5:0.30129535029579047 6:0.14102166298628882 7:-0.28131788612036623 8:0.49635295715686234 9:0.0625636989631968 10:-0.41748132718912 +-10.29566593602992 1:-0.7898597726154271 2:-0.05425577320946573 3:0.5992645759265662 4:-0.4716868549309716 5:-0.020137302700854676 6:0.6216515277233232 7:-0.7295510954484412 8:-0.41443875567123967 9:-0.610576632050404 10:-0.9515988311377204 +7.084732852050431 1:0.9990215581592679 2:-0.9868954542412269 3:0.49133473382040704 4:0.7697599878561228 5:-0.34668939907967267 6:0.9777705993519483 7:0.4449043102759509 8:0.9812971199646168 9:0.6666598587737487 10:0.14398842572598514 +0.23715467505851734 1:0.21628799185444336 2:-0.4526390568867018 3:0.6558486691929235 4:0.13730688681492142 5:0.23076986155942736 6:0.7020484017619715 7:-0.12077999528458938 8:0.8306084972447003 9:-0.49337323198621563 10:-0.8270028152572872 +1.1552619549601455 1:-0.48202394020369277 2:-0.6274878708695264 3:-0.27623674153600697 4:-0.5312153415813432 5:-0.030820182786174044 6:-0.5893370965577813 7:0.6666315120904487 8:-0.36482991729570036 9:0.6065771813692735 10:0.05831057330788525 +-0.20433879835596253 1:-0.4702220250018212 2:0.9123705796362889 3:-0.2045657170490376 4:-0.18922063450309534 5:-0.31431213362503163 6:0.4150130060120387 7:0.34016193625941127 8:0.8391374136299805 9:0.6884250315764333 10:-0.7916408854251566 +-9.751622607785082 1:-0.0014232315621649505 2:-0.1284246813729939 3:0.5228953023175369 4:0.9688522449007109 5:-0.7857721219549156 6:-0.7812922263391038 7:-0.5916136652814756 8:0.793988610184206 9:0.7982949061274296 10:-0.592785473963741 +-22.837460416919342 1:-0.17363144173810174 2:-0.3340314573781735 3:0.9351424971322297 4:-0.6430601902397572 5:-0.13363305808148818 6:-0.42446359566938585 7:-0.4093070316761178 8:-0.9302259781839204 9:0.47004365892170585 10:-0.6231289889808045 +-3.6318714209289436 1:-0.8296410705737971 2:-0.6056572341069668 3:-0.2975417404042737 4:0.07134138175064741 5:-0.8966463747179154 6:-0.4112675899658855 7:0.7908013478009401 8:0.407396254566472 9:0.9227769302156879 10:0.12418427404473764 +-3.8909712376010583 1:-0.6552751548581366 2:-0.5641921108932855 3:-0.6340486345063014 4:-0.5441069121131075 5:0.908720622198947 6:-0.026054643814348077 7:0.03706191653058433 8:-0.6672524338819317 9:0.7958274915288801 10:-0.19029619970124023 +-10.600130341909033 1:-0.7457695999520562 2:-0.3739453132549577 3:0.01327423342620393 4:-0.08482897201178563 5:0.84573456086082 6:0.6279927575103963 7:0.014494803555804125 8:0.9420647557771027 9:-0.13484113287285893 10:0.3037405853352888 +-12.094351278535258 1:0.9982796018306028 2:0.8354271779265348 3:0.46284321795736116 4:0.07693347919601745 5:-0.4753440408996932 6:-0.47098252868073787 7:0.4810729184846003 8:-0.6136990339205741 9:-0.6715833036640317 10:-0.6247058955319091 +9.936399360181602 1:0.7330323083522969 2:0.47204204993669197 3:0.3850471475752122 4:0.21483460195167958 5:0.3806220122265147 6:0.6336993433402796 7:-0.47987416364572 8:-0.195509010865196 9:-0.6561820282562041 10:-0.45300480439842894 +-4.706701061062994 1:-0.847895844561626 2:-0.29946646506145114 3:0.25432868082106497 4:0.1352958872054535 5:-0.8803017974303002 6:-0.3675110562764785 7:0.10888496324899721 8:0.9620737605396772 9:-0.031046632561323895 10:-0.09466883461500908 +5.101614991255809 1:-0.5174248135588373 2:0.14394061894828014 3:0.5613709266711013 4:-0.5678634944326011 5:0.930216209978763 6:-0.6204727890080077 7:0.4133141749872311 8:0.6262685035917408 9:0.03382924477926896 10:-0.15231139191832854 +-8.772667465932606 1:-0.2117605577769197 2:-0.4283897136887762 3:0.44686767473401035 4:-0.5507826261358746 5:0.237124956028401 6:0.6413157520982717 7:0.2409214827604571 8:-0.8505503638033114 9:-0.9811997368468401 10:-0.9499963936664035 +-11.615775265015627 1:0.8782018665273386 2:-0.9751473570197167 3:0.6307050068521085 4:0.7012721336851997 5:0.21318736263512283 6:0.024885128053773853 7:-0.4580644243558505 8:0.1318650007251434 9:-0.9306090092992167 10:-0.5688746770986652 +19.64829023536192 1:0.14426537998360645 2:0.3557716894181753 3:-0.8577143134654568 4:0.5288643233801469 5:0.9231529738221469 6:0.975999712077738 7:0.24700404691888678 8:0.10206517527052283 9:-0.10041951294847062 10:-0.9412918491876225 +2.7409415438025486 1:-0.7404936009304737 2:-0.9792071376296605 3:-0.49510748520932113 4:0.9538460112904268 5:-0.5075114153141447 6:-0.5890791308058669 7:-0.775366087491284 8:0.4983912525892249 9:-0.2976197956132913 10:0.6791258030468514 +-4.394658158733604 1:-0.41628618754613345 2:-0.1472602552309057 3:0.44136102233464025 4:0.011882653940414434 5:-0.6559502840386595 6:-0.4022529016339016 7:0.048402312931387526 8:0.8753776623326166 9:-0.8528247288266961 10:0.6593783978826002 +1.1915739133607073 1:-0.7840827624854878 2:-0.4860418508208426 3:-0.7418773161179972 4:0.129874781837924 5:-0.22631682294184796 6:0.47794208013755024 7:0.5532183426143056 8:0.11879859459306741 9:0.09927630694484524 10:-0.19268618891399636 +2.156192215438919 1:0.44325986644475646 2:-0.6057278708888592 3:0.3943381582091723 4:0.6560336238050575 5:-0.9651308100517204 6:-0.2358219003943678 7:-0.04143043460232465 8:0.8623951169233035 9:-0.4933545255502605 10:0.8990427200454263 +-1.1009750789589774 1:-0.4515707618788496 2:-0.745936099912899 3:0.41307003181926794 4:0.6127760492402428 5:0.9250878169732681 6:-0.6778628527469126 7:0.42794190420905753 8:0.4943969797578971 9:0.7762709104958854 10:-0.6932349268610041 +10.04434496594037 1:-0.0995467494040092 2:-0.7766769414838959 3:-0.6608009972582911 4:0.7075788021090594 5:0.5208396359138381 6:-0.09724033794207299 7:-0.743087245352148 8:0.765372791789753 9:0.3788699859744704 10:-0.6898257995055466 +8.038039859115667 1:-0.5321510657026671 2:0.5571925538006008 3:0.747268102801854 4:0.09079641165917596 5:0.25861122989509266 6:-0.9948187479498878 7:-0.9665136866462685 8:-0.3904629432867681 9:-0.9975425877998279 10:0.32024289816988416 +5.14371929922303 1:-0.4829199170694627 2:-0.5713285263827719 3:-0.43889652467111184 4:0.18478247261988967 5:-0.27374063120041225 6:-0.8069125377696931 7:-0.15497746743367058 8:0.32448521325998714 9:-0.39397735035206227 10:0.08184957956614292 +-1.6848276484379352 1:-0.39250489761445895 2:0.02730338852529557 3:0.9916055514435305 4:-0.07571433435055064 5:0.19024527726403728 6:0.6385182319185971 7:0.32480605537471297 8:0.5807543325220577 9:-0.35642510103381153 10:-0.9060482769392468 +-11.640549677888826 1:0.03707410390488852 2:0.2527049166981137 3:0.4114872952854447 4:-0.8508977901757795 5:-0.42791544663481895 6:-0.9864047295390463 7:0.6023685964407528 8:0.12018443688097036 9:-0.36816249877130414 10:-0.9583147535652901 +11.672104494601319 1:-0.2416258355340175 2:0.6737553249072334 3:0.9041602191361382 4:-0.2123232797997281 5:-0.008255188002961988 6:-0.5151894064136904 7:-0.7341877977528246 8:0.624625272218277 9:-0.6261434804192929 10:-0.5710586715741532 +-2.2960192492344627 1:-0.7457768645184579 2:-0.5954998103421847 3:0.5428846769211537 4:-0.6176587961491775 5:0.46222150678166574 6:0.7852238239427731 7:-0.3614580530629148 8:-0.325840253127059 9:-0.5660596710348922 10:-0.8060263366626401 +5.428302298615722 1:0.8774286357993033 2:-0.23110126319781088 3:0.6264134914476072 4:-0.143015582616014 5:0.350109539755298 6:-0.147747167834422 7:0.05020570422182824 8:-0.5469605849960337 9:0.951112567977048 10:-0.34800121380288185 +-17.32672073267595 1:0.31374599099683476 2:-0.36270498808879115 3:0.7456203273799138 4:0.046239858938568856 5:-0.030136501929084014 6:-0.06596637210739509 7:-0.46829487815816484 8:-0.2054839116368734 9:-0.7006480295111763 10:-0.6886047709544985 +7.314490512652487 1:0.8745354279105222 2:-0.9270067504840309 3:0.965218170323435 4:0.12808957052353698 5:-0.5309399625085234 6:-0.5968520990090951 7:-0.667403236513185 8:0.08154410986660832 9:0.33025488397543934 10:0.03406708067839537 +4.687373993408297 1:0.6731426721418288 2:-0.7111023070261273 3:-0.9849054116048603 4:-0.12831346258317322 5:-0.04095946352836921 6:0.6967001556166801 7:0.8479895229743999 8:-0.35600791972899404 9:0.5005979045264868 10:0.6421341979636503 +-6.82923852156868 1:-0.04849233571020073 2:-0.8505855619911602 3:0.2927180954190314 4:0.5780268040086791 5:-0.22322207765417268 6:-0.8436513934568071 7:-0.3906240514635124 8:0.7258714963093444 9:-0.21695043530813085 10:0.8049335285918169 +-8.24622879369294 1:0.12154833675098842 2:-0.26446415445316673 3:-0.06653791221669247 4:-0.7920694887292259 5:0.6128791496627621 6:-0.6927179137980173 7:-0.24584418172709932 8:0.3557416365779935 9:0.22868636757755234 10:-0.8288196322549064 +-5.090863544403131 1:-0.1535668648046895 2:-0.59868738365189 3:-0.8822518703008675 4:-0.1790505106198006 5:0.9467581256591948 6:-0.0661313762905984 7:0.31263046332923694 8:-0.03628894224569357 9:0.8969599435828515 10:-0.05386674051170348 +-15.780685032623301 1:-0.2568492063716883 2:0.7740976197426315 3:-0.7829158104387535 4:0.8578846037465748 5:-0.6111039318672586 6:-0.26939268282639306 7:0.3659136640533909 8:-0.8205938562638555 9:-0.24945505706767923 10:-0.935948184861368 +-3.9916779937384743 1:0.22925954469403154 2:0.18159238246979537 3:0.05607027262862396 4:-0.3376037702047998 5:-0.10630000583678934 6:-0.7148277241201622 7:-0.08327294541727137 8:0.6532439360618307 9:0.34352364313237294 10:-0.21028242388807916 +8.798748248458631 1:0.509058184822212 2:-0.17532831457577935 3:-0.6387880909085213 4:-0.966194650702529 5:0.7829797328120436 6:0.5470735549914605 7:-0.38312745239682333 8:-0.8890923931840893 9:0.6823342859396513 10:0.9231260597729121 +14.341273640964873 1:0.6996156678090684 2:0.8612833977834464 3:0.9131301694042417 4:0.5199385192744859 5:-0.32605907950755086 6:-0.9816465962348846 7:-0.5939885763232406 8:-0.7730924566676425 9:0.6367821449954114 10:0.10873812383881054 +9.75855501262469 1:0.2933324921347933 2:-0.4652534314332506 3:-0.2940640558090537 4:0.9883453215038367 5:-0.042460731786114314 6:-0.15438550895912062 7:-0.11182397625560592 8:0.7425954283250873 9:0.5063859049644963 10:0.3012211854180429 +7.695200921242407 1:0.3554353390157281 2:0.08707592690448718 3:-0.10815435665633877 4:0.05524046679762784 5:0.8000157491787581 6:0.3756193347272323 7:-0.18659830666742527 8:-0.08168623764933125 9:-0.2551379303720174 10:0.8560030587463281 +26.903524792043335 1:-0.4672678144441864 2:0.868381965588082 3:-0.04748335609643428 4:-0.0908285508827269 5:-0.22436865911994275 6:-0.953965287326564 7:0.40644848732968164 8:-0.33391575325981115 9:0.008337907338700212 10:-0.45597904754961416 +9.87318781117539 1:0.7310287890171705 2:-0.38300115058116324 3:0.5492682498036086 4:0.552016070316655 5:0.3715022458396897 6:-0.3888040017277252 7:0.21348231125683648 8:0.23219558685722874 9:-0.6271161253492459 10:-0.009137052604519136 +7.6930514050666625 1:0.48603550488592284 2:-0.9218820771919889 3:0.17678612698428053 4:0.5110501870908806 5:0.5817010201164554 6:0.4488707800038747 7:0.4977618637956498 8:0.1683214570038094 9:0.17237242672259323 10:-0.5276084644007359 +3.155413914311745 1:0.04582517188512947 2:-0.9060800653779759 3:0.049786270132956556 4:-0.4236784487542993 5:0.6293910028372613 6:-0.7370237043436467 7:-0.4599678991281728 8:0.5317111095323057 9:0.0029525239228334055 10:0.9294876800738165 +-10.18815737519111 1:-0.9023553189306839 2:0.08434165073970856 3:0.7163931103395633 4:0.41749986495957914 5:-0.8190972970472759 6:-0.9996126872234177 7:0.1779075727741255 8:0.18212754689351862 9:0.24628508239298963 10:0.667589863190412 +18.585731475373457 1:-0.8399129036462931 2:-0.10024819268489127 3:-0.5011350892733817 4:-0.7299256348863585 5:-0.9412022985072928 6:-0.245064895931544 7:-0.1032512650854267 8:0.9943863256441088 9:-0.6429371028855466 10:0.062299742931960056 +8.998359297106072 1:-0.16850226855111905 2:0.7262839202089402 3:-0.04876255055071854 4:0.8948164957242868 5:-0.10720585418953132 6:0.2622719447841948 7:0.26433837506661373 8:-0.5143449147399106 9:0.17444585508955002 10:-0.813182163328944 +13.032424230011074 1:0.4014766166181287 2:-0.1710502754125871 3:-0.309850483152607 4:0.255642456909988 5:0.9949117714165621 6:0.12553772251510864 7:0.6412602805648968 8:-0.6225679446416825 9:-0.15867011477056936 10:-0.4970695349036196 +-6.931030745953174 1:0.5151452174260762 2:0.10077064818539072 3:0.9515221270405545 4:-0.21551878535257907 5:0.29152528087481366 6:-0.10995497026133605 7:-0.7872786530801681 8:0.9909149980139627 9:-0.6044617953251021 10:0.4135285912642448 +15.538062451207367 1:-0.493569696351595 2:0.7280914440594639 3:-0.5399160539735497 4:0.5688018985826291 5:0.8296550361854862 6:-0.3519274619833537 7:-0.5536583684230114 8:-0.9648774930921231 9:-0.2649670832738824 10:-0.2337289004188019 +9.499729032920945 1:0.22017490770298553 2:0.7693082799289328 3:-0.7645745307823122 4:-0.4243400515554365 5:-0.7065281515163817 6:-0.9488470141298047 7:-0.7888781431404843 8:-0.38027758953310964 9:0.11329243985448345 10:-0.5636550498916204 +-0.6039115764951412 1:0.3128791250125589 2:0.4690308315665288 3:-0.9819748103687955 4:0.28931283693913223 5:-0.6283983933456656 6:-0.10795935596621975 7:0.7785831799196448 8:0.4453768248295542 9:0.4055410615499917 10:-0.581108383985806 +9.682301463907875 1:0.5039970331368235 2:-0.008965105921562966 3:-0.5415225380115054 4:0.4677111860370293 5:-0.3854089758945243 6:-0.8468317339287676 7:-0.29258253017713587 8:0.7361173598968789 9:0.5722561668394952 10:0.8524030171340933 +-2.8752191903928064 1:-0.45407356732165205 2:0.6563221064539377 3:-0.8938366926767671 4:0.6028173420234533 5:0.6792881349943096 6:-0.6295604812779405 7:-0.21641416912497213 8:-0.8703620515028858 9:-0.3397362922228042 10:-0.0966947467107604 +-28.046018037776633 1:0.9493308195854675 2:0.3285214661535252 3:0.749300278016316 4:-0.006681618268088219 5:0.2936055273341429 6:0.0044706790416966236 7:0.5006172205470896 8:0.38751814960349473 9:0.6069735922707928 10:-0.794612882855285 +2.8752582614589373 1:-0.9443232811926943 2:0.3153126492983107 3:0.6423843271417344 4:-0.09528333043829118 5:-0.2318773828230698 6:0.32597909562645766 7:0.42808555740416065 8:0.2895959316734451 9:-0.5106491076955746 10:-0.2917418155655722 +-2.203945173593806 1:-0.13844025039418084 2:-0.024638102806725293 3:0.6114514176076162 4:-0.6939316676972749 5:-0.036549673716341324 6:0.0942395290460385 7:0.7943411369475493 8:0.7025693796408046 9:-0.21822635487138853 10:-0.6045250179827362 +-5.070655299509993 1:-0.8035156105848074 2:-0.5344928236067734 3:0.6105404604447127 4:-0.7538635525543969 5:0.9836765037886612 6:-0.5700253195942724 7:0.9232380985458313 8:-0.26374377078100464 9:0.9079431466301682 10:0.8404281771949533 +-2.540181413836895 1:0.220453181647285 2:-0.03105792440486077 3:-0.17131282366411926 4:-0.41800060634660485 5:-0.1477564564540963 6:0.055537469469941536 7:-0.8092076926316594 8:-0.29815112444525727 9:-0.20030580647762464 10:0.337865838755971 +19.341342586351033 1:-0.32052868280788616 2:0.954507993011956 3:0.38642226954792824 4:0.9240442034609888 5:-0.46077559741256824 6:-0.7991393493824104 7:0.9396232321156679 8:-0.2486930151964184 9:-0.6256485833035617 10:0.14861843824730103 +0.31398559122529757 1:-0.4684215762946897 2:0.07873308388585198 3:-0.3589594894052015 4:0.14284662079329458 5:-0.8936272055527841 6:0.5647217242826741 7:0.49613233215723507 8:-0.501698787526992 9:-0.46710107378968724 10:0.898517179577361 +12.243117462926584 1:-0.8147610562690222 2:0.21104006948075482 3:0.42405323019132957 4:-0.667965573810795 5:-0.267026607469405 6:0.7949752815579358 7:-0.07664414977654532 8:-0.6023087644686556 9:-0.659375887511856 10:0.459735946423397 +-4.623091296763939 1:0.08405646515942733 2:-0.40009448092691446 3:-0.39831245310544094 4:0.8794137836499942 5:-0.04788565812369017 6:-0.06763019434549333 7:0.41324877265674065 8:0.39746868847324146 9:-0.986729367280818 10:0.7001677710291752 +-5.782162271139417 1:0.29127970805530157 2:0.6712715787317827 3:0.27575757044478477 4:0.31525054647682804 5:0.6905016168465983 6:-0.5195319089267731 7:-0.06598129860341295 8:-0.5142554034519407 9:-0.11546331150946942 10:-0.2934524891698944 +-9.892155927826222 1:-0.7048583334456604 2:-0.46598491327111247 3:-0.02034722477413209 4:-0.663294196316965 5:0.4485329128582778 6:0.4553619594861118 7:0.7154814909138205 8:0.7532937661147989 9:0.020693077287389894 10:-0.23131986644633207 +0.5422764698408844 1:-0.1513298744027669 2:-0.4591544337339648 3:-0.7192219559850723 4:0.21236658135317632 5:0.12050445497328166 6:-0.42411528242712127 7:-0.15103925528861595 8:0.9032115729799512 9:-0.9228817525021624 10:0.2604090001033641 +4.187800872274017 1:0.3084355607627949 2:0.7029638272178733 3:0.34098344122299573 4:-0.5818421369891376 5:0.1332356708082485 6:0.22671316744441716 7:-0.6750469752494854 8:-0.4065302428716193 9:-0.48213803977370073 10:0.17918596677210186 +4.487701812297124 1:0.8352061350259052 2:0.2757393215770836 3:0.9310504392364667 4:0.519503546762708 5:0.5270245209143005 6:-0.08181154800488488 7:0.5148324302455536 8:-0.6680946101511949 9:0.7574060703813035 10:-0.4721334895419935 +-5.150140984417682 1:0.8113709439821006 2:0.21939305063309278 3:0.02109986546311826 4:0.07450107676582762 5:0.723883853128624 6:0.5392035186380486 7:-0.1382740221237464 8:0.9990201540159807 9:0.10429329766137108 10:-0.1365266408862309 +-6.544633229269576 1:-0.08278037549320039 2:0.6982730989138761 3:0.9090685953368327 4:0.6754092061339365 5:0.5889199822482736 6:0.020678619551471433 7:0.47605785660672084 8:-0.49926771127869873 9:-0.28380077002944093 10:0.5282319276258469 +7.216836352055753 1:-0.8510680074642156 2:0.42611818262128476 3:0.593607821624947 4:0.5635067468583634 5:0.2121930523769171 6:0.2708063180622071 7:-0.31491113345871735 8:0.005990053407278095 9:0.8985259402559085 10:-0.44549339042232794 +20.874246167942125 1:-0.53010692413621 2:-0.9897084749945524 3:-0.9083978261828305 4:-0.15581655583739495 5:0.9974035542095165 6:0.9894717992956665 7:-0.7287287537245402 8:0.06425127137526943 9:-0.06684164745938337 10:-0.3600621883071937 +-6.556192430758147 1:-0.7655958349167471 2:-0.08083170734199419 3:-0.8540636958251198 4:-0.09994429443696973 5:0.1734809016500265 6:-0.29563180244063325 7:0.2158497607364409 8:-0.6071644305523003 9:0.8063426715403785 10:0.47092299197899345 +7.252748885335252 1:-0.36403312429467216 2:0.1237451136826817 3:-0.5756427605741237 4:0.7612833636750866 5:0.9350628314096134 6:-0.012087843264624754 7:-0.03742573515965031 8:-0.05210460803183037 9:-0.5333214800203341 10:-0.013320030179712505 +-9.2679651250406 1:-0.5057250557539077 2:-0.41655319851679495 3:0.1897431234740683 4:-0.038318717640150046 5:0.9136495575471062 6:-0.8890525036858237 7:0.40859501498633377 8:-0.8746985847539293 9:-0.005836984002720369 10:0.7838036026237987 +-15.732088272239245 1:-0.8546867577633044 2:-0.3003980324850013 3:0.49649883896876834 4:0.710496747220617 5:0.5848510480601048 6:0.5714826756665468 7:0.5487975165953451 8:0.5654333402837335 9:0.863539315599626 10:-0.9699410102494574 +-0.20412431312519014 1:0.13323548063028934 2:-0.3030177580658542 3:-0.6358920925969869 4:0.3729380701923921 5:-0.8131818118430312 6:0.11567152703716288 7:-0.3645508535812394 8:-0.5487213252460876 9:0.5605886387366952 10:-0.8400308993051686 +10.445759684895373 1:-0.92707280355555 2:-0.9307772570299944 3:-0.11971873660640964 4:0.5140245291069254 5:0.5751145648836897 6:-0.43850910073502347 7:-0.7872208869913662 8:-0.3087975452145404 9:-0.4645849758749403 10:-0.1563641826381328 +3.349708377102383 1:-0.6334394121009499 2:-0.9008086683014112 3:-0.2678892493467009 4:0.7626514243443427 5:0.6406493676995701 6:0.3669245573649391 7:-0.052050629941784665 8:0.6713394117904852 9:-0.11458974566378233 10:-0.25949626043219576 +-23.487440120936512 1:-0.5195354431261132 2:0.8080357948412571 3:0.8498613208566037 4:0.044766977500795946 5:-0.9031972948753286 6:0.284006053218262 7:0.9640004956647206 8:-0.04090127960289358 9:0.44190479952918427 10:-0.7359820144913463 +-11.827072996392571 1:0.9409739656166973 2:0.17053032210347996 3:-0.5735271206214345 4:0.2713064952443933 5:-0.11725988807909005 6:0.34413389399753047 7:-0.2987734110474076 8:-0.5436538528015331 9:-0.06578668798680076 10:0.7901644743575837 +-3.650649176738987 1:0.9665344025238449 2:0.1395514751689353 3:0.954697162791015 4:0.2093601878355722 5:-0.42841737775246336 6:-0.02877209657213764 7:-0.8382526163632971 8:-0.03773878779258388 9:-0.3751775119106411 10:0.6477987464528951 +0.21915863046310957 1:0.25143109618049353 2:-0.06463696557011112 3:-0.3324862332340037 4:-0.7184623449423757 5:-0.8897217937178385 6:-0.7336278194091297 7:0.8547631637534296 8:-0.7582613025929346 9:0.9080481791309838 10:0.9427850135311773 +4.813247597584681 1:-0.4564689661727537 2:-0.4315414033069003 3:0.09676404446694242 4:0.6024645727173434 5:0.20466090997530606 6:-0.09432916868838737 7:0.6402934161890248 8:0.741842551426011 9:-0.343937669190693 10:0.308871619426873 +-3.0700825038127206 1:0.660084046469162 2:-0.02317305725931229 3:0.7567569356692221 4:0.2528834502236612 5:-0.3935091635208898 6:-0.9965507922509653 7:0.9065754202428946 8:0.6988037588300844 9:0.05145737657924321 10:0.4697377584426863 +9.762542323725354 1:-0.036129448543738896 2:-0.8252508992030534 3:-0.752854859129851 4:-0.9027424488033049 5:-0.4763092428375775 6:0.4832492121777574 7:-0.2935697977919014 8:-0.9197908986231211 9:0.8914359296658816 10:0.8688484670974876 +6.690913813146277 1:-0.7649833946109403 2:0.0419327356721928 3:0.5420954694310764 4:-0.7373259510045522 5:-0.9187577877864708 6:0.6431180783847401 7:-0.6272529754533058 8:-0.43356486537110106 9:0.16848266440424364 10:0.3129700315745716 +21.325049167466855 1:-0.36392795201361383 2:0.846518905511275 3:-0.26361421923150097 4:0.5140384860444887 5:-0.9147771624497878 6:-0.22044646197773576 7:0.14099760779666948 8:-0.546631395802236 9:-0.4345465263406878 10:-0.2759894364167672 +0.41237529640734055 1:0.05016964684797287 2:0.21708512805176072 3:-0.9444942733586354 4:-0.6118772896807114 5:-0.18053631846913665 6:-0.06752556529755416 7:-0.0081819952134361 8:-0.7774039956687315 9:-0.5548994336153177 10:0.7510833121912588 +-15.056482974542433 1:0.6012054064354875 2:-0.6127014811673221 3:-0.8356741843949218 4:0.19830469052767397 5:-0.07726493085289698 6:-0.5756891943805014 7:-0.49010583357941884 8:0.7493759119974515 9:-0.7828994218436376 10:0.6154265137741459 +-2.109441044710089 1:-0.5757976103755722 2:0.3686657403505862 3:0.5418762444017706 4:-0.5896052565388463 5:-0.1000712585735879 6:-0.8114188394866342 7:-0.5863884932327266 8:0.28289838755838015 9:0.5378646921099333 10:0.5063780890366179 +-5.249715067336168 1:0.6828022788286754 2:0.9044668986252975 3:-0.6010464361571437 4:0.8416122052398811 5:-0.9846446498408039 6:-0.3777762313579811 7:0.5763775880953983 8:-0.07608009385213488 9:-0.29576023599575474 10:0.8845728751981716 +6.907770824878343 1:-0.9751352215365647 2:-0.8297271715190588 3:-0.07240311280415779 4:0.4796310183582191 5:0.358213469979769 6:0.4628020211207058 7:-0.9753405605972942 8:-0.765583403709019 9:0.5623611232648877 10:-0.14725965272406616 +-9.299021854126096 1:0.8784076266914045 2:-0.8314918563417382 3:0.8701529449600536 4:-0.8070129727442199 5:0.07396877198841345 6:0.0040889707225901795 7:0.40529205456687145 8:0.6412485325027342 9:0.1443450351498905 10:0.404997568726581 +10.95643670126225 1:-0.37321642594676097 2:0.47766490569544473 3:0.9201313123144423 4:-0.649393433578801 5:-0.9084894063674787 6:-0.2547160991750408 7:0.7674649994523459 8:0.646056370118979 9:0.6014100713287893 10:-0.15130291862509182 +-2.6397202393123336 1:0.3285252466844373 2:-0.2714281159811125 3:-0.5869561846815805 4:-0.5643935541712441 5:-0.7285201267315389 6:0.6502951074428092 7:0.8611880383193904 8:0.6380425291162128 9:0.5118538704085516 10:0.4012684110865874 +12.521131042032012 1:0.4843931319727355 2:0.06440853455169626 3:-0.6151259240105509 4:-0.4180928328467284 5:-0.4607061773323424 6:0.8104775289268906 7:0.3284199695768064 8:0.8425028998495565 9:-0.34822319854822825 10:0.1969239149176112 +-16.151349351277112 1:0.7787909191620395 2:-0.5219981442072688 3:-0.7230569918898555 4:-0.05707801168212101 5:-0.8134225824740247 6:0.09950684183685454 7:0.6261274830059296 8:-0.9502006765164366 9:-0.6724983095526844 10:-0.600347212281825 +-5.039628433467326 1:0.7680701397575322 2:0.7956844224408437 3:0.4131717201035916 4:-0.3127895385265915 5:0.7226571953995224 6:-0.06845863083031967 7:-0.1007291660029832 8:-0.9130249132342207 9:-0.4605180615474036 10:0.42093879298156 +9.007596502870785 1:-0.6562175566238462 2:0.08420074013217049 3:0.589801949672486 4:-0.11964901133703987 5:-0.8145711913860048 6:0.43854302140351065 7:0.5992967124729605 8:0.253745043289755 9:-0.10742030998120033 10:-0.5993228348160153 +-12.41094640284016 1:0.31035917086763765 2:-0.8053417167237813 3:0.5754655536186164 4:-0.3645388095106201 5:-0.9135176753316416 6:-0.8690739610562535 7:-0.14039224825138197 8:-0.7112835675593987 9:0.25762942117230825 10:-0.9483300117501923 +-12.130353212287929 1:-0.41404309625298485 2:-0.7695984204591535 3:-0.44569447239245275 4:-0.3168863099965644 5:-0.26669244730409036 6:-0.33484042698895755 7:-0.41062396946367685 8:-0.09075804785640385 9:0.8511367190902208 10:0.021918606255194595 +-15.375857723312297 1:-0.9794952880997945 2:-0.9547237660069134 3:0.2460912345929791 4:0.3174335823329406 5:-0.23758562926743054 6:-0.113610303129287 7:0.18292675847568063 8:-0.9656446754474337 9:-0.58300134324846 10:-0.6689602908128025 +-6.397510534969392 1:0.440780662587545 2:-0.03737991637410243 3:0.9506435891605849 4:0.8177486462589998 5:-0.2917628929963241 6:0.42365289098031034 7:-0.4280555544979745 8:-0.18388426864865903 9:0.5057230088452542 10:-0.1699163749308643 +-9.789294452221961 1:-0.25066699970459694 2:0.1772977344415987 3:0.5913498268900952 4:0.6293756431864681 5:-0.6430441015863757 6:-0.7238519180293621 7:0.13639541626580498 8:-0.6620281401715837 9:-0.9515237061912034 10:-0.4333426289849791 +-13.15333560636553 1:0.3491978525665129 2:-0.4067353159374012 3:-0.8677040612253524 4:-0.5757086910974862 5:-0.3186886816681207 6:-0.06609938943414573 7:-0.5419747642754873 8:0.9632759660044383 9:0.2673520823110991 10:0.36463236596724546 +2.2307697392937795 1:0.12285527276472785 2:0.8938323722714365 3:-0.16995870341610209 4:-0.3298643049714254 5:0.16781582791954253 6:0.42381594687105895 7:0.9245288214717629 8:-0.08709025093361689 9:-0.14227085487682722 10:-0.2888302862659746 +5.892885365508635 1:0.10116053019915738 2:-0.41641547074900154 3:-0.3750004290914961 4:-0.5619470211369917 5:0.33343039544460384 6:0.46766042657994733 7:-0.6937940929321615 8:0.7044604392055189 9:0.8879353764416567 10:-0.5490902425042639 +-16.692207021311106 1:0.9117919458569854 2:0.628599902089868 3:-0.29426892743208954 4:-0.7936280881977256 5:0.8429787263741186 6:0.7932494418330283 7:0.31956207523432667 8:0.9890773145202636 9:-0.7936494627564858 10:0.9917688731048739 +10.454641756541454 1:0.3490213088098768 2:0.6103387992494194 3:0.6753935651135747 4:-0.39560763769937934 5:-0.3042308221531884 6:-0.9283481899557042 7:-0.7904038212853011 8:0.37488335848537346 9:-0.296477977723397 10:0.30894819444660304 +0.08978797103855778 1:-0.13445409764877803 2:-0.6404150831493631 3:-0.24740260669490133 4:0.031151119464385646 5:0.9207882173498612 6:-0.6146471129497393 7:-0.9736175690408087 8:-0.2673180325645341 9:0.5800384183301572 10:0.479811220263183 +1.7362099941626894 1:0.5171681395917551 2:0.6177735922313075 3:0.6446678302226738 4:-0.5731769722311459 5:-0.2686270617709168 6:-0.6048534221658814 7:0.7002124303669326 8:-0.1479765297345712 9:0.009254061109394307 10:-0.31519081920853287 +-1.0349488340235453 1:0.612980711993536 2:0.05771318707554962 3:-0.10821368362160744 4:-0.8755559420458141 5:0.42566546089913326 6:-0.7966341558699277 7:-0.45253617234374466 8:-0.8289517557653971 9:-0.8968075137250837 10:-0.6325457096866376 +0.10157453780074743 1:0.9143592240573388 2:0.06419631741815457 3:-0.9961326744227916 4:-0.47174548800139715 5:-0.0821464027819967 6:-0.5495006555498168 7:-0.5627911401420294 8:-0.43426056724099005 9:0.892026786364895 10:-0.23546485121284055 +-12.92222310337042 1:0.218687524173371 2:0.013626751799176162 3:-0.8372219908323961 4:0.6197296846266354 5:0.7429130827811232 6:0.48009972886541896 7:-0.35667717521227904 8:0.18337067878780533 9:-0.22935396092245197 10:0.4076715024284059 +22.923352376063196 1:-0.7522075505725567 2:-0.20686029838909326 3:-0.1386664769095396 4:0.157117595808127 5:0.9462377653889174 6:0.9182504509330662 7:0.18170057747293833 8:0.27735387813088863 9:-0.6355799944714868 10:0.9764849106195284 +-6.132450015997121 1:0.2822534275343054 2:0.2625905791399692 3:-0.02565260641304068 4:0.4891221076432757 5:-0.6426178913585772 6:-0.8999539149461033 7:0.12659507663825287 8:0.5889572439755832 9:0.49107548332672857 10:0.47595749470452 +-9.173693798406978 1:0.4430245286298278 2:0.9923116639471541 3:-0.5609082824097824 4:-0.36542266258313916 5:-0.5814039716882617 6:0.20413852042674874 7:0.6097541611931963 8:0.5743002479324253 9:0.4735459963431561 10:-0.053969823043886755 +-5.814408490931223 1:-0.9206287328000513 2:-0.48298486023273157 3:-0.8477202041890262 4:0.5801385102362351 5:0.7146074564553095 6:-0.5987672678579339 7:0.6829077928212723 8:-0.3522788540815065 9:0.7729595638821951 10:0.5264904880591215 +6.474329501040298 1:0.6914309300550991 2:-0.4507700505202725 3:0.713821440501512 4:0.41599059910235847 5:0.507160951750409 6:0.8635615811574222 7:-0.6235518270244333 8:-0.5336201820384283 9:-0.7989630679361768 10:0.837293162455248 +6.984517471584806 1:0.16745919469723392 2:0.018033079961716103 3:-0.7339201095541323 4:0.17042828693740697 5:0.4493471632580528 6:-0.8938445962323078 7:-0.3206968104792325 8:-0.616617071238893 9:0.9327878222034172 10:-0.6575294247048245 +-12.479280211451497 1:0.9769767754725367 2:0.7706430863248943 3:-0.4506244622476816 4:0.12921761745628713 5:-0.0697804449658812 6:-0.7702703569987461 7:0.017734558413919688 8:0.7216294158911261 9:0.42547357862241886 10:-0.9001915116155741 +2.8363866587728186 1:0.11478724114928918 2:-0.4679790550082039 3:0.2344912687736711 4:0.5524878060045462 5:0.5252859884051309 6:0.5080674087215156 7:0.5010449021825665 8:0.048046765816400105 9:0.06654581719548891 10:-0.5801934713347348 +4.186809777233374 1:-0.02335342201396018 2:0.9035437912091193 3:-0.9283585631882163 4:0.454351316397237 5:-0.6948564428085262 6:0.11495485234890368 7:-0.23683956078769963 8:0.6442534752881419 9:-0.013866407845647188 10:0.23369602940650736 +2.8235031660626415 1:0.5609344938188046 2:0.3449103464885612 3:0.03972169049525687 4:0.31858762565827137 5:0.4409953589124853 6:0.22836189275697016 7:-0.1497811991899889 8:-0.23248048920679265 9:-0.30066618281100177 10:-0.9247232456911632 +6.96223432848425 1:-0.8160398553437558 2:-0.8212180893749699 3:0.7728655115832999 4:0.02387973088796369 5:-0.043499804905828166 6:-0.6997726250046865 7:-0.8686633773265577 8:-0.12597318402253976 9:0.967018116368416 10:0.5951339624149812 +4.669684795838683 1:-0.32226903644852833 2:0.5465858078942492 3:0.5228467793266189 4:-0.013157722224545143 5:0.5810668818928995 6:-0.1372653090293532 7:0.6446157527288279 8:-0.06005754873230629 9:0.014302180040152379 10:0.43474245441042636 +16.112744845653285 1:0.37257742858083365 2:0.19398954512844124 3:-0.11860882189887478 4:0.6492510749703395 5:-0.41273736981203313 6:0.18643017041815835 7:0.29136917186214384 8:0.47602883023389 9:0.7126916980867937 10:0.48462508659691483 +-9.196003366226202 1:-0.7263358951920722 2:-0.8503799288093836 3:-0.3120563620589105 4:0.3925562655164563 5:0.027666662972283484 6:-0.35173134138805406 7:-0.32703527910354757 8:0.3060102722285065 9:0.8609161725740202 10:0.33394557004432923 +1.242972458167591 1:-0.9029238804456814 2:-0.6392681059531908 3:0.8940879647942577 4:-0.8807357173896475 5:-0.13628130467470512 6:-0.5487534785116224 7:-0.40270307148061346 8:0.09152108686997096 9:-0.20745066734844642 10:-0.20624830574384978 +3.453659210660726 1:0.2710596844435682 2:0.6510497900145247 3:-0.2899158136103117 4:-0.13531811694554707 5:0.6965847786422426 6:0.9105343028780231 7:-0.007340232468413754 8:0.7672537187738411 9:0.3538906829188173 10:0.35387524540947646 +-0.48115211266405217 1:-0.17943755364759517 2:-0.1384979591151625 3:0.8425773648797268 4:-0.43234064993405097 5:0.919754442523921 6:0.8390197802990036 7:0.43890653121452683 8:-0.7647648217789051 9:0.14770258954363835 10:-0.6681813635676657 +6.965069440749298 1:-0.9158261471030473 2:0.5228494114644282 3:-0.07760531122743153 4:0.6154296244963067 5:0.5231830145381096 6:0.4892535590799165 7:0.1987053183082137 8:0.9995670294711712 9:-0.2020375688074112 10:-0.7853579334836087 +-1.6896486293598596 1:0.4638529147853421 2:0.0953805943546191 3:0.8506904243225251 4:-0.028262644692445438 5:-0.9462342015500664 6:-0.6934738957112123 7:0.601125018257533 8:-0.04871041957758315 9:-0.015245062056267411 10:0.6119856200040805 +-1.763729644326212 1:0.5376618752928528 2:0.8062119856717131 3:0.44996834959923593 4:0.9917728248530817 5:0.5974717482179492 6:-0.406972851600659 7:-0.8523198502065281 8:-0.3076377139692321 9:0.9099974915864462 10:-0.43374966692373484 +9.012829566937228 1:0.6885456531832366 2:-0.0631164354373237 3:0.8394182300770314 4:0.7207913383891218 5:0.4715324450375691 6:-0.34417503908167757 7:-0.31448279255342126 8:-0.020591617987411936 9:-0.37668573574418107 10:-0.6528048324896532 +-15.951512565794573 1:-0.6112828771933607 2:0.4867007149846869 3:0.863494046941478 4:-0.7292072742454481 5:0.6338749652624007 6:0.5980798993978542 7:-0.5119002889878654 8:0.8394383182101366 9:-0.1412423080445726 10:-0.15838730884968655 +-0.29622788243318465 1:-0.9436253326661384 2:0.2907259958032098 3:-0.1530538226933904 4:-0.6174176535420375 5:0.8209632215649141 6:0.5060548803172731 7:0.8212448453211292 8:0.33506684706740386 9:-0.5408309869188785 10:-0.8105966349150977 +-7.683213587039055 1:0.2525015766703558 2:0.6417869320191234 3:-0.7569571597336913 4:0.5265130776924394 5:-0.03992944660560949 6:0.18292946303778823 7:0.4286344960738724 8:0.9158523573288766 9:0.5039796366711773 10:0.27660486075533797 +3.9061298856792797 1:-0.6501789225392032 2:-0.6040685518173872 3:-0.6448094322678659 4:-0.2019498832769746 5:-0.5302977370883424 6:-0.010754341856880067 7:0.8791702222974846 8:-0.2283571791337704 9:0.4726320486679656 10:0.3413255179758332 +12.928385148211825 1:0.7793178379505685 2:-0.5207562047491976 3:0.37253320760898934 4:0.7540757518052998 5:-0.679378421540417 6:-0.11966022036636881 7:-0.4317798870297489 8:-0.004211291952602059 9:0.39024653887361693 10:0.45391057946097146 +5.787566514603203 1:-0.20596730554338039 2:-0.8840796727164746 3:-0.749416279057892 4:-0.5511023306046077 5:0.9941631901218697 6:-0.09907966722992234 7:0.701617914811792 8:0.9696055014561289 9:-0.7083648075748707 10:0.5781111533720358 +5.701262468657861 1:-0.7066995012593675 2:-0.6756815056791965 3:-0.5720277255842998 4:-0.09218662060241067 5:0.21494136076896653 6:-0.37012884573008153 7:-0.6828277646796448 8:-0.10038134655965236 9:-0.46253754509583356 10:-0.20813933595648115 +0.9473494330088033 1:0.6876806675510589 2:-0.9530860102792402 3:-0.4043172626863887 4:0.6696455505098386 5:0.17863581804857254 6:0.1944646561635497 7:-0.5283662172535679 8:0.4872263841818012 9:-0.2882651789318431 10:-0.06293411605141874 +-2.6834375589185675 1:-0.22376759986120187 2:0.36555755546798885 3:-0.5223502955721961 4:-0.20702347869224624 5:-0.7745351063999764 6:0.22879328233099971 7:-0.5440007473902635 8:-0.6959483071829207 9:-0.131433881760733 10:0.2764225554693165 +-3.2766108642276146 1:0.0304613976530983 2:-0.3148062986719251 3:0.24950420590071953 4:0.7152023826801459 5:0.9656885739650887 6:-0.3210562623763835 7:-0.7305896664502614 8:-0.49074917893875836 9:0.7802670253347352 10:0.8667409958355992 +-1.1838791995691869 1:0.06642047806096318 2:0.5336148776806793 3:-0.6199614859883396 4:-0.15342280723497237 5:0.8407250402808968 6:0.7060811811107444 7:-0.2913182140909305 8:-0.5925203360011633 9:0.22644925021629692 10:0.42395071889002467 +-1.5856680515554806 1:-0.8724712788102853 2:0.11445744032031424 3:0.5483166457680566 4:0.9469521544884028 5:0.2541682828467746 6:-0.436750733871873 7:-0.9001249399695319 8:-0.7555793441458385 9:0.06946992897983018 10:0.9724148045760346 +-13.039928064104615 1:-0.558607026518148 2:-0.7356765018678253 3:-0.7547644426290201 4:-0.24898664843938745 5:-0.3606374046883567 6:0.5836652368902306 7:0.8497678666873467 8:0.21331875915717635 9:0.3558733809635668 10:0.9642603628738968 +-17.428674570939506 1:0.8562209225926345 2:0.7077202100653552 3:0.7449487615498371 4:0.4648122665228682 5:0.20867633509077188 6:0.08516406450475422 7:0.22426604902631664 8:-0.5503074163123833 9:-0.40653248591627533 10:-0.34680731694527833 +13.886853032969585 1:-0.6354915752033683 2:-0.9132338112681755 3:-0.4816479770266455 4:0.5448417181244594 5:-0.6250746297187781 6:0.7410618768880199 7:-0.18029029550083675 8:0.777358236920447 9:0.9625064189449102 10:0.048040935468046 +15.61684729251139 1:0.2980237970192188 2:-0.8160931971814265 3:-0.29649852157138445 4:0.3896688599904572 5:-0.17552110506337826 6:0.8721328328445139 7:0.48984799668438916 8:0.9984496052876473 9:0.9665885195526289 10:0.8966559812150274 +10.33625540376971 1:0.09939495068155724 2:0.9790332181038015 3:0.9483428886275702 4:-0.5717299810793317 5:0.4876405069057712 6:0.163962913892302 7:-0.4095537988924203 8:0.8608269751255508 9:0.010028680058212114 10:0.9095786494455713 +9.706032970113723 1:0.7687898546315146 2:-0.9825109379412285 3:-0.5423211794439926 4:-0.3099509487314134 5:-0.11561305536236333 6:0.9012327035409926 7:0.5257495475790148 8:-0.33804422025989433 9:-0.144428735681567 10:0.28019332199039604 +6.189043888072968 1:0.13246655756059478 2:-0.751192382628302 3:0.2233421456265161 4:-0.1933575076984373 5:0.8681727702736863 6:-0.7656847407654899 7:0.1033145549916572 8:0.33909210370257403 9:-0.22241363302770267 10:-0.14479004187830435 +-8.680225911784335 1:-0.07718769939880432 2:0.6702228057326558 3:0.6647810334933819 4:-0.05115658747070784 5:-0.850780588302118 6:-0.040961453376221924 7:-0.8407690297644956 8:0.33775829053563156 9:-0.45421556034898547 10:0.8238500771967823 +-9.42898793151394 1:0.8925906426831107 2:-0.6771269725125597 3:-0.11635105688280678 4:-0.7266044201050157 5:-0.6902918845825077 6:-0.5911234800910024 7:0.49395074569300657 8:0.43660804414878274 9:0.8736983081269782 10:-0.8001177058312081 +8.486245765579415 1:0.5614295382716652 2:0.3972427851719582 3:-0.276268504977494 4:0.7803448249454739 5:-0.358957923558495 6:0.3477822689529795 7:-0.7944805581842691 8:0.8356932134547437 9:-0.4783293647580624 10:-0.2522633417723845 +-1.8722161156986976 1:0.11831037290857482 2:-0.7309091607574014 3:-0.7339122716951587 4:0.2046641765436359 5:-0.9914679283125301 6:0.13518339528098555 7:-0.9760821540963867 8:-0.6080636193563043 9:0.3890502262427238 10:0.33864957953815145 +0.5122357093733743 1:-0.9555852441641726 2:0.4754771858792488 3:0.3743376249200432 4:-0.2651772997462427 5:-0.7915484529586028 6:-0.7575915279708862 7:-0.10432268807273859 8:0.021604934223709238 9:-0.6458011732912265 10:0.40773716196391674 +-18.845922472898582 1:-0.6031480148285926 2:-0.8736524730197766 3:-0.311456616524979 4:0.420921703897325 5:-0.2904011177124777 6:0.6683252350591937 7:-0.3436202976676894 8:0.5023604359385605 9:-0.33056149241985633 10:0.5168854058825227 +6.492106438811399 1:0.7824832256885428 2:0.6105456307389117 3:-0.0436873997963223 4:0.46730493583332855 5:0.2057529813440686 6:0.5738310686722767 7:0.6307964411259019 8:0.6208424783086652 9:0.8931894299284251 10:0.7164648197763028 +-1.6472226859532182 1:0.8854767145642171 2:-0.8175744681485637 3:-0.14894858038610903 4:0.9667400540136402 5:-0.3575837217508149 6:-0.9211342680517054 7:-0.956785876301889 8:0.6558217028031554 9:0.8014538160668165 10:-0.9475520920917395 +0.185861229793925 1:-0.8181719548530746 2:0.9990094335332504 3:-0.8195848911987829 4:0.6991933015233858 5:0.07295718417836583 6:0.5968996100546737 7:0.4871410306452193 8:0.2980483098540927 9:0.779953293728507 10:-0.7978867112395516 +-5.973450525185694 1:-0.975435413991927 2:-0.7832951303253313 3:0.5098999023442101 4:0.46795978867990007 5:0.2538986807863044 6:-0.8182887550010198 7:0.8335391734637112 8:0.4286082996234335 9:-0.1726765956719154 10:0.7649845978453362 +-12.773226999251197 1:-0.383327656965585 2:-0.9439560491389036 3:0.25039001869622446 4:-0.9342091044843222 5:0.8711023711291135 6:-0.6027135241543655 7:0.9456874780319795 8:-0.243290468946338 9:0.625765915285031 10:0.5160550067618355 +24.290551295953957 1:-0.8368553572749229 2:-0.5859456648150321 3:0.873779532007048 4:0.7462623178738954 5:-0.08133011570245352 6:0.36767541461776676 7:-0.33129619282275047 8:0.6104289727615573 9:0.9416581563055089 10:0.18201841676606856 +14.490247980976621 1:-0.4765937762114507 2:0.16430711839945555 3:-0.526776940706293 4:-0.6802269991653915 5:0.40748236413299344 6:-0.500290139207977 7:-0.31915972151663885 8:-0.4586068416002418 9:-0.15572660263944127 10:-0.32925702602833073 +8.377230871265601 1:0.44141613060964846 2:0.1582267687752743 3:0.8760950367284166 4:0.40434058393690364 5:-0.7063758409891474 6:-0.616055773516162 7:0.996372393127579 8:0.6142084876085476 9:-0.528320587432094 10:-0.2815909691094802 +-3.2987560995836653 1:-0.4600479783378091 2:-0.04201794336103326 3:-0.8934505203905587 4:-0.44991326751905536 5:-0.5220579476363783 6:0.46060949186328703 7:0.9169289030735643 8:-0.022458426893944283 9:0.08100795210565637 10:0.5726732415540354 +0.3422568955736137 1:-0.9888686059817204 2:0.22752298580182706 3:-0.5048696915520232 4:-0.059433420464226616 5:0.7823831512651716 6:0.9865977573980389 7:0.9164100011124972 8:-0.3638554550863984 9:0.3038282907667611 10:0.4652367033461571 +-8.24116881862084 1:0.7565819250331731 2:-0.3733277500524168 3:-0.8841150081071696 4:-0.922282989989148 5:-0.041520813551309876 6:0.8615967014876558 7:0.8474207144091339 8:-0.7518437864641427 9:0.45076605239968837 10:-0.48912984167595375 +-4.367083147104942 1:-0.276459380002813 2:-0.957555271384241 3:-0.3761632810202544 4:-0.3897414804149022 5:-0.3133861519856074 6:0.0777990809172171 7:0.6638552243422928 8:-0.3477312155364247 9:0.5934885465182675 10:-0.5238903641193555 +1.9280240152322783 1:-0.40051093785549696 2:0.5070348672240661 3:0.7506759969575532 4:0.5042104954516786 5:0.9959688260926507 6:0.4657024999761399 7:0.910611131925299 8:0.9836517468598804 9:-0.6263172749113686 10:0.16955852322929155 +8.918138317441574 1:-0.22407391224687023 2:0.5545084933214972 3:0.6335932367683528 4:-0.2786481116648991 5:-0.9549992830441785 6:-0.5577873948545062 7:-0.960657200286197 8:0.3709573488946196 9:-0.9191180485753339 10:0.5033478020271929 +-5.657796797481157 1:0.6359910361030725 2:-0.1742637774815281 3:0.39699327107265137 4:-0.9841991491194473 5:-0.622093571871533 6:-0.5433497301426455 7:-0.6731178481686009 8:0.930615153085582 9:-0.3065877908950827 10:-0.5456093749639228 +8.697079562319692 1:0.4815820396629933 2:0.1173457441514223 3:0.7313645402039386 4:0.3354835387237334 5:-0.10300554535074702 6:0.5116687640761355 7:-0.8850803659104614 8:0.10654026377571157 9:-0.864976708975602 10:0.01345035085413615 +0.033954684723234596 1:0.6703241653088159 2:-0.13447915740201166 3:0.026022550037831937 4:-0.5145659862194116 5:-0.6963587636078901 6:0.652083884947352 7:0.22644722530715278 8:0.2671580129293405 9:0.9659035105360283 10:0.9547989197693989 +7.359108382166921 1:-0.6855762478384229 2:-0.7543318537260015 3:0.4772611975128618 4:-0.5588002332845741 5:-0.24271386844336496 6:-0.28595644325868896 7:0.8732728098501104 8:-0.8026384804471058 9:0.7589508830210041 10:-0.9992933613402135 +4.953597303754355 1:0.8915633023548608 2:0.04688596266450751 3:-0.26866754730613374 4:0.16694236975718102 5:0.23465297255622608 6:0.36488427850844407 7:-0.06717041145276781 8:0.9470029805221898 9:0.32483835237272674 10:-0.7892521260150298 +0.683536559775105 1:-0.32176084249781556 2:0.5446298870866526 3:0.4095848716057642 4:-0.42579711490120187 5:0.4482850543749355 6:-0.0982243826242506 7:-0.9190317048427039 8:0.06234509402976718 9:0.21327512416175054 10:-0.38023673796734525 +-28.571478869743427 1:-0.4597184465402242 2:-0.5489429386926741 3:0.33422914572951634 4:-0.15992695377395516 5:-0.7310003311728188 6:0.18241063863467488 7:-0.48385214010599453 8:0.08139879039334552 9:-0.8401239538877046 10:-0.8896372220209929 +-19.884560774273424 1:0.4619217451285318 2:0.28157115824800005 3:-0.3829811521605375 4:0.5802544015450464 5:0.1117061271473403 6:-0.8926034502584623 7:-0.34862293810401956 8:0.2733254857260612 9:0.6514176550598809 10:-0.02758604919357066 +-17.494200356883344 1:-0.4218585945316018 2:0.15566399304488754 3:-0.164665303422032 4:-0.8579743106885072 5:0.5651453461779163 6:-0.6582935645654426 7:-0.40838717556437576 8:-0.19258926475033356 9:0.9864284520934183 10:0.7156150246487265 +-15.86200932757056 1:-0.6341453831788726 2:-0.9259180639727085 3:0.302702923864538 4:0.749555004323947 5:-0.7932989575334761 6:-0.5620972938631934 7:0.020542041027870717 8:0.11610338700447698 9:-0.7912600154897766 10:0.5108307672038874 +9.027804254487519 1:0.1746878011084212 2:-0.5872807344913673 3:0.6018547246457264 4:0.5106104933121229 5:0.7329523371170135 6:-0.40058771577765895 7:-0.48753463550174025 8:0.34308791976318 9:0.3407668956765344 10:0.5964472848798394 +15.949172086880687 1:-0.7790584545657173 2:-0.017224094786103317 3:-0.0974907790179953 4:-0.10287391996036166 5:0.6007953354774878 6:-0.7032497754397848 7:-0.36068070856329437 8:0.021391994204512432 9:-0.6509100388083549 10:-0.5410899936281377 +-6.151586699415245 1:-0.5318094974022525 2:-0.830796057445983 3:0.603828597318087 4:0.6660892552257192 5:-0.18529748408390523 6:-0.47166833767648986 7:0.592915541856605 8:0.9944601563352204 9:-0.6981606574244703 10:0.34942553665003584 +2.010398523297265 1:-0.9293899922307269 2:-0.07588009904844029 3:-0.8500855420709359 4:0.12191867923536615 5:-0.528778681165414 6:0.3117086447237414 7:-0.4222963938187163 8:-0.03247894950300623 9:-0.05387792412717962 10:0.4053568741659812 +-6.749023248121471 1:-0.9875370165216966 2:0.7137693455001415 3:-0.2510160963160164 4:0.8732150877079123 5:0.49658934612905314 6:-0.9817012857861731 7:-0.2045309437850289 8:0.7562713668333418 9:-0.6787434327188155 10:-0.6147932888026117 +4.452639829999693 1:-0.35256148944834176 2:0.7581152951164591 3:-0.37755890552299265 4:0.9480813371197343 5:-0.3419340388717347 6:0.3487602851799074 7:-0.5576726724270562 8:0.4899696188087421 9:0.563074979676983 10:0.7865891460062227 +-4.938733988900586 1:-0.4108386466193119 2:0.3287655432069885 3:-0.5853553038038923 4:-0.6480591422742821 5:-0.4787998161299789 6:-0.5828003484675421 7:0.42835744317623003 8:0.8378098987706633 9:-0.5645180498703375 10:0.28981512694646705 +-3.373242544176224 1:0.04989033652617936 2:0.6575826440927308 3:-0.24028051935833128 4:-0.6649808138961095 5:-0.6530198970442704 6:-0.19331254127919362 7:-0.6743004878881749 8:-0.7214986105015062 9:-0.30648035516261385 10:-0.6455097687924254 +-3.2843694575334834 1:-0.3548536057581908 2:0.7350125943559394 3:-0.3635282827378974 4:-0.8552820154885781 5:0.9140879208466111 6:0.21870365067770892 7:-0.17738543429561382 8:-0.052851966578491005 9:-0.36066059517759097 10:-0.9020765799355679 +-3.277146077677404 1:0.910961221014513 2:0.4302525202590246 3:0.11079959840001119 4:-0.3614188274820125 5:0.5080231397310961 6:0.013940825892631237 7:0.33583012240022403 8:0.5008797094229163 9:-0.663083147090173 10:-0.0865028013627418 +-0.202246147968096 1:-0.4929308143227653 2:0.8374300027105082 3:0.08763999085193186 4:-0.499738438136623 5:0.5926071511295365 6:-0.5135396038023627 7:0.6946715869746543 8:-0.5184428793490325 9:0.21753085495829239 10:-0.33796308746585235 +-7.1237150573506955 1:-0.8506203499039495 2:-0.6581804183622855 3:0.6484205342724825 4:0.013914696389758285 5:-0.6214530117645831 6:-0.011163110491807293 7:-0.6025372583334574 8:-0.0371573886520411 9:-0.7933455929226487 10:-0.38653838674273455 +6.298226129171093 1:0.7304191211928768 2:0.8128475475660479 3:-0.03161148630216015 4:-0.6018899317958344 5:0.19277055729934367 6:0.3002272616310928 7:0.949169758830406 8:-0.1011823256970481 9:0.16093341376629966 10:0.9596833606094763 +14.906594657519511 1:0.5053240355803015 2:0.6775698974866082 3:-0.6194771000646291 4:-0.02876927004033525 5:-0.5481504206112477 6:-0.9239150546263386 7:0.471216755072994 8:-0.0027794620943384363 9:-0.8954411386878227 10:0.8991742143686698 +2.1710965297686267 1:0.4578509053930304 2:0.9270194505165124 3:0.22470373699901236 4:0.21526179917432753 5:0.5299563895862103 6:-0.5824108997775908 7:0.03801922095671095 8:-0.5164033454609385 9:0.4370246809487237 10:0.6514133050988229 +15.05806598279517 1:0.48645077410559057 2:0.7821442063987365 3:0.1943681666933883 4:0.8289246958621577 5:-0.08034311437806041 6:0.03709694472527203 7:-0.895481297246602 8:-0.42921579749551664 9:0.5447075872378688 10:0.844397849728866 +-0.4683784136986876 1:-0.5083135683360327 2:0.626070365769088 3:-0.8737725909401557 4:0.725622293853621 5:0.0018794384199978253 6:-0.9343604622552886 7:0.6655593328822609 8:0.47501755618845753 9:0.8388618477210947 10:-0.5143806767304449 +5.823027255871114 1:0.08635467091841886 2:0.6314532702073175 3:0.8862069437865836 4:0.6542025864928516 5:-0.6846784290231471 6:0.048487096050569445 7:0.30828004933669395 8:-0.49438881988995687 9:0.5706936923061823 10:0.037705651885639346 +7.03779380408974 1:-0.07193682621291098 2:-0.5816975957307158 3:-0.8426927090342973 4:-0.37504851992255306 5:0.4473129018316815 6:0.3101938194888525 7:0.6160050428837607 8:-0.913998555949695 9:0.40461966540531313 10:-0.7581141330823786 +-9.770500546345563 1:-0.31358873581579894 2:0.11771478839130278 3:-0.3404842110585631 4:-0.0604362797252429 5:0.2159524972176814 6:-0.24737863017398087 7:-0.8541428610709716 8:-0.06753562283135062 9:-0.11567537916769255 10:-0.5606246203677223 +20.000154367451547 1:-0.344717847914646 2:0.8454969480099985 3:-0.58856299370874 4:0.5884510299634649 5:0.49162879631128553 6:0.7958075013181658 7:0.7781911267315837 8:-0.6780885011989877 9:0.9797694629597928 10:-0.1872163682079866 +-6.239848349456753 1:0.9132793720646253 2:0.1680340663118458 3:0.01740115925682284 4:-0.26580395408599133 5:0.28551914590761074 6:-0.9939706142381568 7:-0.8740927279520219 8:-0.8731218126652498 9:-0.10993630739903892 10:-0.3069565039708746 +-4.173072569004537 1:0.7864835254860851 2:-0.5614522227484218 3:-0.7718396381376464 4:0.49508673889127985 5:0.24030155936964714 6:0.8080778221819038 7:0.05395496402881128 8:-0.3045148076729973 9:-0.6134406357458853 10:0.7447268183581948 +-11.328415936777782 1:-0.10183127796258096 2:0.5689039487721601 3:-0.07015335898840225 4:0.23254189629731292 5:-0.3226974656715038 6:0.2859450214054784 7:-0.4916677058012495 8:-0.27564895614732055 9:-0.9416483232894219 10:-0.7472248333434015 +8.719164753818454 1:-0.8231424386390782 2:-0.03953537069863633 3:-0.3271580541537027 4:0.892192314973022 5:-0.6759017192358232 6:-0.419591686354591 7:-0.23967385135363606 8:0.936992531568956 9:-0.12946409158671512 10:-0.9082863469271643 +22.31738046492344 1:0.37030851555335365 2:-0.06654751559177563 3:-0.5759425437665169 4:0.9179952251152963 5:0.8628921839116359 6:0.8421952184405965 7:0.9625804174561126 8:-0.03075332253237728 9:0.12227386374957994 10:-0.6243390357793757 +-1.189108450798179 1:0.5681776913545951 2:0.46049028271139436 3:-0.366463711956754 4:0.025856437432560275 5:0.7547565372954261 6:0.5506193192167212 7:-0.6279807084274867 8:-0.38698884324386107 9:-0.9885778854008227 10:0.7814740172261654 +2.8767042393531965 1:-0.6841229745503388 2:0.6252203895646273 3:-0.6737644654353572 4:-0.7321040107741059 5:0.3162570540986238 6:0.6211089085315002 7:-0.33984617437403464 8:0.1227089818682312 9:0.04586594421613177 10:-0.4679977358965799 +2.783332151730615 1:-0.39148258540779013 2:-0.3037233649803406 3:0.7955133548911926 4:-0.1729544208044842 5:-0.18247049275020033 6:-0.1315085429729259 7:-4.447133918370483E-4 8:-0.805837119503338 9:0.11574866650006688 10:0.8517519041042676 +-8.99205564094827 1:-0.45501536967706535 2:-0.35829694693457914 3:0.775695048377375 4:-0.25331195582275745 5:0.15524612858817055 6:0.7400717904631442 7:0.8382485596668376 8:-0.5619009369436814 9:0.4386801597659249 10:0.09960232210246622 +-9.808386702564658 1:-0.987404834666963 2:-0.6732308850750186 3:0.5528285725528492 4:-0.8796302275267409 5:0.30705569958232193 6:0.8635312232105203 7:-0.14033675947074187 8:0.5516086773506235 9:-0.7487899106678442 10:0.8851518933134919 +4.948281656077033 1:0.4331269064492329 2:0.4628446087354616 3:0.33730748244242537 4:0.3473124014683382 5:-0.1707966473106064 6:0.8558057784524846 7:0.1390312032172829 8:-0.7918343112673001 9:-0.85993782695915 10:0.33563174747577107 +10.791261476321019 1:-0.5417345768902055 2:-0.06334901799780424 3:0.027652223245870466 4:-0.9881487640651161 5:-0.19441123027957707 6:0.40295156581142355 7:-0.8315553696517317 8:0.11405283165483926 9:0.5377980570161418 10:-0.24581620554740824 +-0.7287230169119936 1:0.33985587202063283 2:0.6841261099887705 3:-0.9441564997438197 4:0.28660913255058906 5:-0.7597915572726905 6:-0.8535957517473378 7:0.609134673753593 8:0.29636368731717977 9:0.05791523580926916 10:0.5589907965230858 +-26.805483428483072 1:0.4572552704218824 2:-0.576096954000229 3:-0.20809839485012915 4:0.9140086345619809 5:-0.5922981637492224 6:-0.8969369345510854 7:0.3741080343476908 8:-0.01854004246308416 9:0.07834089512221243 10:0.3838413057880994 +-16.71909683360509 1:-0.24375714099465773 2:-0.11915875769929496 3:-0.3741442802364221 4:-0.3812947578178094 5:-0.7032156297055756 6:-0.18339122712542388 7:-0.8634662520461855 8:-0.714561692659166 9:0.020558676493369177 10:0.22804428969949986 +-8.822357870425154 1:0.39332200105884363 2:0.5652370435795515 3:0.6220479966351453 4:-0.018976695481651484 5:-0.6868425195058918 6:0.2029750380170401 7:-0.5550873767310935 8:0.16864133648532342 9:-0.008843355054633628 10:0.6472547984399621 +0.36392761004065594 1:-0.9059630492963144 2:-0.41039282402227384 3:-0.006673269562094131 4:-0.4989314017618798 5:-0.17726034513032318 6:0.037764439388023874 7:0.30703957185016595 8:-0.09040426404909185 9:0.38661451965066274 10:0.1630571642147851 +7.415902871490132 1:0.188586850708651 2:-0.33013604761672566 3:0.6667976416858177 4:0.8537064956198137 5:0.03971370422819254 6:-0.43229195778759966 7:-0.9607154505216515 8:0.8413204878098277 9:0.40010565279599897 10:0.7306602852367441 +-4.129456164370826 1:-0.7967510984807558 2:0.545111159425699 3:0.16038228447433012 4:0.6311115528116698 5:-0.01985759480036542 6:-0.9516543115476572 7:0.18022912194075458 8:-0.2177157123823752 9:-0.5433158910016767 10:-0.4603867691069983 +-9.211066571082247 1:-0.3611235296125135 2:0.1402619601475985 3:-0.23132525512647795 4:0.5534401725834837 5:-0.34978585787763206 6:-0.24147682088922773 7:0.8089009287617064 8:-0.09075864922490862 9:-0.05759391404550773 10:0.3371306765964468 +6.52392916461972 1:0.19122050285976044 2:-0.625453376800498 3:-0.26804961781489856 4:0.9669297468261109 5:0.9142504122291741 6:0.7678963028488108 7:-0.6852943621882759 8:0.5898129788981794 9:-0.6580947533327339 10:0.46875109532259396 +-12.46765638103286 1:0.35148385951742633 2:-0.5206883134357769 3:0.35436280451876345 4:-0.8837833467474128 5:0.3433887284719144 6:0.3914771858025621 7:-0.17813796710416252 8:0.6553344538056296 9:0.3721548243590813 10:0.9442185832979726 +-4.937258492902948 1:0.9150659354384785 2:-0.17085510578573548 3:0.8233227233543232 4:0.2539669132090434 5:0.18955049451212935 6:-0.2833188558310358 7:-0.48483747414616496 8:0.8917378487725669 9:-0.13169122011498646 10:0.9815059855284158 +-0.5233425797210233 1:0.4238363705720569 2:-0.18363058784066522 3:0.2949874786744968 4:0.12235592695567354 5:-0.9746310186182559 6:-0.8990867637441311 7:-0.8580982328464586 8:-0.7930887027205957 9:0.16757307988090275 10:0.988861929608575 +-11.904986902675114 1:-0.3692990475534952 2:0.32166293883244323 3:0.3401547722249436 4:0.10009747375878408 5:0.7598877208920192 6:0.2853003389082669 7:0.22880221701675074 8:0.4521491122351502 9:0.33222018268933895 10:-0.9500018867461919 +8.324969054805921 1:-0.48086111720736513 2:0.3705524122401185 3:0.43635448766342133 4:0.6544321903349255 5:0.059000747296945155 6:0.3328036763371236 7:0.9609146376298034 8:0.5943082361322021 9:-0.3074246170581105 10:-0.6763916655761453 +0.21701641918233017 1:-0.29449708766806304 2:0.040640346437143426 3:-0.6524819533513639 4:0.37482287233702394 5:-0.29800608396043216 6:-0.537030944860492 7:0.2862394027536084 8:-0.3783043133672048 9:-0.5292179323972728 10:-0.09583783955916791 +-6.84977373580439 1:0.825136109631339 2:-0.5722868691442817 3:0.11048134523744757 4:-0.5946054293068455 5:0.28061485657354823 6:0.9135611623885838 7:0.35590421873954603 8:0.8943562249941011 9:0.4183378981109729 10:0.5714160298247304 +-11.039347808253828 1:-0.9620263418414967 2:0.22669065740934724 3:-0.7378036492234086 4:-0.4460191511609126 5:-0.2594476006347024 6:-0.989879976130936 7:0.762096015449097 8:0.6983868222083149 9:0.8729993459982626 10:0.3426647417451305 +-5.882860061103163 1:0.5247178959769465 2:-0.6217169944869176 3:-0.13640714414758315 4:0.6608201052790283 5:0.5789945243704264 6:-0.12686057623612612 7:0.7277882307863026 8:-0.47949544949858236 9:0.9781208432412936 10:-0.8980068284379361 +23.52945433069272 1:-0.12339549394875426 2:-0.6769524283089239 3:0.9324962870874394 4:0.28956947294105206 5:-0.2957355479338608 6:0.7504385350771912 7:-0.8769262306643106 8:0.41591311300668155 9:-0.7694611231426498 10:0.9885110924181837 +19.043184423383824 1:-0.13783178628851878 2:-0.853631844645959 3:-0.12792415583066052 4:0.6936898387576049 5:0.8488563282318959 6:-0.6530521292304581 7:0.27832187660440666 8:0.09838048719062442 9:-0.5913230087557231 10:0.260839433107553 +6.83105883806984 1:-0.9085282656519695 2:0.65203708247844 3:-0.687580071985604 4:-0.045008726377529173 5:0.4762107922777967 6:0.15939259525248506 7:-0.46363191848939334 8:-0.25856682230410266 9:0.313842004143269 10:0.5042938214484851 +-9.409197719620593 1:-0.34356198962701945 2:-0.06381545064099514 3:-0.9332814619122063 4:-0.2629675367181199 5:-0.03876014002851913 6:-0.4606936151803749 7:0.49272969757318563 8:0.5550196351479111 9:-0.1758425343811718 10:0.20285868144226837 +-1.3101852978323116 1:-0.3740821549570985 2:-0.9788976137554464 3:-0.6078739734947245 4:-0.8007745980271539 5:0.7381298546055934 6:0.7407750458109124 7:-0.7711351008178868 8:-0.9895256155202141 9:0.35793767138197174 10:0.6589909255086295 +0.5180809608973377 1:0.19289850282287446 2:0.6301214514538145 3:-0.15311307199521518 4:-0.8607670552113709 5:-0.46422067276745316 6:-0.29812862604449464 7:0.519464836430044 8:-0.9480450997338103 9:0.973503038633444 10:-0.7843880226794626 +1.9947872601406775 1:-0.15799682110486057 2:0.22645891561571352 3:0.3141842574216682 4:-0.36086019480721676 5:-0.1429373936064291 6:0.8097261636650581 7:0.11764088861630029 8:-0.9151998265501957 9:0.6536711690904891 10:-0.17232697113157425 +12.352290000973428 1:0.8176113135335772 2:0.39342616792621987 3:0.44011948797971234 4:-0.4412435869837865 5:-0.24509203724837314 6:0.8636655043434542 7:-0.4251583124505798 8:0.2068056615503988 9:-0.3501114760443049 10:-0.23701353324739483 +-2.891643319177732 1:0.7722403010820704 2:0.7994121584045861 3:0.18520464815273208 4:0.7273575609391227 5:-0.3758589216283552 6:-0.7598404862373955 7:0.5748649410179301 8:0.6897988099260968 9:0.5638920860629713 10:-0.992567809902162 +4.803737144054077 1:-0.7367711178556622 2:0.07370548192399351 3:-0.5510509754264419 4:0.11949095653894504 5:-0.7723751845800411 6:0.6450480728551136 7:-0.9508825019800493 8:-0.3250395411575804 9:-0.24913562167143777 10:-0.3617439870343031 +5.051689886526102 1:-0.09854955786627007 2:0.5298224514703289 3:-0.014996634675966236 4:-0.4462048687049027 5:0.22912790083984547 6:-0.513533454471272 7:0.1452771069237353 8:0.371152210841464 9:0.9204732090987018 10:0.7472990716905279 +3.8591142298280476 1:0.7532169023970261 2:0.8291433156934658 3:0.9255891263525324 4:0.3248663809949248 5:0.9905320652281553 6:-0.10383453745167626 7:0.8519246838852608 8:0.6024015353989258 9:-0.06958036249881938 10:0.5862142389541998 +11.30005914221598 1:0.026411858067972194 2:-0.6968445330429607 3:-0.8194566946165238 4:-0.12780659247925996 5:0.8406393783194903 6:-0.24617182945415128 7:0.30199973460219853 8:0.6062457235841974 9:-0.19314055910416927 10:-0.48313233883372964 +-10.288657252388708 1:-0.7388306404020344 2:0.07753617971873439 3:-0.5735498713988352 4:0.2183581175474576 5:-0.873572721679176 6:-0.8788755575751708 7:0.7087858362905568 8:0.7126712562404713 9:-0.7607334319316799 10:-0.4627367552114916 +4.895250842405817 1:0.9772954128558484 2:0.6020087399988574 3:0.16946626176056134 4:-0.011334492807484997 5:-0.5391845039589362 6:-0.4315843612118535 7:0.9065130011032458 8:-0.4860160207844919 9:0.0921755607946162 10:-0.022200673265013515 +1.0479421939727227 1:-0.055436367433274514 2:-0.6710483362647659 3:0.9222786043047919 4:-0.22005981623386184 5:-0.8141845044113469 6:-0.31766631447334226 7:0.6067696845798944 8:-0.1445661385071555 9:0.9172271611227454 10:-0.8079554780561127 +-9.754451457291598 1:0.533713237587885 2:0.6499588942067549 3:-0.49188790503368285 4:-0.6925119436487435 5:0.3345265979579788 6:-0.8117849521672496 7:0.9312055115656304 8:0.3273803451149724 9:0.7567478475677727 10:-0.6256676928549367 +5.869027126482974 1:0.7273823383600513 2:-0.2519813990388706 3:-0.8239584025397881 4:-0.13749750031735974 5:0.6142824732416132 6:0.6251630800232315 7:-0.6138240706157267 8:0.7210396245391326 9:-0.41832155201953714 10:-0.8965988320689853 +9.14234252751227 1:0.7295320896113133 2:0.6150271212503227 3:-0.9785024737101733 4:0.30006672036705506 5:0.11703528191771406 6:0.2971639460196238 7:-0.7920108995168815 8:0.32649036066184567 9:0.03522428067355543 10:-0.1766251898148803 +-5.643698771141404 1:0.27360638280623983 2:-0.6124401810442446 3:0.24950528730210886 4:0.09920211684887548 5:0.7187490549286091 6:0.6212724115415782 7:0.5864634211269566 8:0.114951165007104 9:0.44859258949094283 10:-0.3768352371578665 +12.781643819428492 1:0.9144335582094396 2:-0.4579872615218674 3:-0.6521934534632468 4:0.4462086111316512 5:0.240360283350179 6:0.23974046479581124 7:0.4840439971437822 8:-0.7250363120037027 9:-0.29769496257362094 10:-0.3382859512018359 +8.393556738722923 1:-0.8263387132502396 2:0.9434824094966923 3:0.1607861709872136 4:0.15217100448798782 5:-0.6517945935711484 6:-3.354731073326178E-4 7:0.07846631386981562 8:0.687844846942889 9:0.9277854407325892 10:-0.8855380268588307 +-15.348871155379253 1:-0.5734707274250155 2:-0.2526008551945753 3:0.23752094195309925 4:-0.7074613963298721 5:0.4674168537545218 6:-0.3198997855552628 7:-0.10415974108745596 8:0.5616912699671224 9:0.43742425558560694 10:0.19732530755184596 +13.138260063721448 1:-0.9415220143797984 2:0.6015431361268124 3:0.38898046240229545 4:-0.5750448371021175 5:-0.5803995196333898 6:0.11772198725731342 7:0.7512685244060366 8:-0.6683465740662857 9:0.9515652825318053 10:-0.32405935964523547 +-26.736207182601724 1:-0.47083104147202404 2:0.28748860067800597 3:0.007399318769021113 4:-0.8189013750589702 5:-0.5156633937248272 6:-0.9906928746525896 7:-0.8848419810272337 8:0.2197280161306785 9:0.12855082514870197 10:-0.7862803985146845 +-20.212077258958672 1:0.5609065808412279 2:-0.9201904391147984 3:0.908305865183735 4:0.9255146658282842 5:0.6871419344095282 6:0.4201876217923466 7:-0.42906289792612684 8:0.5787691868233418 9:0.7260522064761288 10:0.28251641556690554 +-0.44652227528840105 1:0.37640618494870504 2:-0.20012451052963542 3:0.9420894309510319 4:0.4218728633972739 5:0.5551974480349577 6:0.07615991810462619 7:-0.12409220462011294 8:-0.22212591926375946 9:0.21160498862483723 10:-0.6092792830633924 +-1.9481059746438067 1:-0.43820030250217457 2:-0.6836588417639442 3:0.733018205278934 4:-0.6564348753121718 5:0.7333385435136448 6:-0.5577457688360317 7:-0.31035811050608975 8:-0.7189201447768139 9:-0.7629842028723994 10:0.7179459779331092 +1.1951162998609508 1:0.19541555859727744 2:-0.4796785506546435 3:0.14123852670749248 4:0.7161847585887089 5:-0.2502765085719578 6:0.8815667909545981 7:-0.6418691905513725 8:0.49600147195728783 9:-0.3091837674381053 10:0.4320162841463153 +-8.99125390483227 1:-0.01183888602092864 2:-0.5901829024081027 3:-0.4343074406380647 4:-0.40450313056290166 5:0.05269590196351448 6:0.733631212862198 7:0.9575176715505025 8:0.5974628692830348 9:-0.20284241796038271 10:0.9577348510907686 +-7.955533026930219 1:0.6104830760481679 2:0.5915483572646505 3:0.3275427350991458 4:0.48361434056132424 5:-0.9466590639056058 6:-0.24662428438925743 7:0.9856361456534972 8:0.9434155212648045 9:0.3466736921968707 10:0.12927980558284102 +-12.500773785355054 1:0.5733321361720694 2:0.39154119830075085 3:-0.9347116355607772 4:0.0920586614926524 5:-0.6959457183810456 6:0.2136579936466858 7:0.17595268059814395 8:0.8828168055200465 9:0.18934277314853398 10:0.7565908584660754 +-11.43180236554046 1:0.082018621904135 2:0.9074181204118958 3:0.46125595008850273 4:0.40328845936169966 5:0.7803064691948824 6:0.20802011482729377 7:-0.41368899649077284 8:-0.8997565495498339 9:-0.1880483213318005 10:-0.15538597634233264 +-5.055293333055445 1:0.4442675297698402 2:0.19045719972922193 3:0.4877438951288897 4:0.7984474402420494 5:0.3251350777349489 6:-0.18676050499673869 7:-0.2701840041572374 8:0.4486609996458524 9:0.5403637876036615 10:-0.8971614841211264 +1.0276485382241776 1:0.7953696703382547 2:-0.3245779681908927 3:-0.3507435626548021 4:0.9510986059491036 5:-0.8655491074076527 6:0.20729233888498677 7:-0.43078300089533594 8:0.19504657032168216 9:-0.3173814102187291 10:-0.042479969052890754 +9.690201571311908 1:0.16852987139559206 2:-0.2514893273405625 3:-0.9993240281686275 4:-0.2166013247997891 5:0.33294165754921234 6:-0.5824203831560628 7:-0.15253642946648616 8:0.3547892367555441 9:-0.047604356104869794 10:0.9229112136183077 +2.2591036039970347 1:-0.9919593184325572 2:0.6323551392201245 3:-0.20815293136790447 4:-0.002395046469600759 5:-0.5015903362190326 6:-0.16698803749234048 7:0.7901657583805675 8:0.33755402936964973 9:-0.3707337678548108 10:0.6995480653730146 +1.5130881908855742 1:0.973710432688613 2:0.6518972988019702 3:-0.16491318496856833 4:-0.6066757853095415 5:0.8762371591845273 6:-0.9056066630820714 7:-0.3388079327070965 8:0.3934146060660142 9:-0.8756168865642253 10:0.9522427911640303 +4.023618949132531 1:-0.14974626191548301 2:-0.5874962377709136 3:0.6780439909311404 4:-0.37291203746764356 5:0.08104034602232169 6:-0.4706923395029945 7:-0.8924577368048239 8:-0.3363784341297067 9:-0.4139746050396018 10:-0.5107600309932907 +-2.8674162893420965 1:-0.7554383289076523 2:-0.1355597928418868 3:-0.3891904246986413 4:0.43949832438341785 5:-0.43859957095446833 6:0.37548094528561093 7:-0.5228633291549518 8:0.24169710795100352 9:0.7131753590746546 10:0.03458176767001042 +4.661164232198611 1:-0.12738868751385546 2:0.9446285809821182 3:-0.17981416859193433 4:-0.7535879975625193 5:-0.08594548726529161 6:-0.9983154486609989 7:-0.7272748852665216 8:-0.8197811039616518 9:0.5177610923333253 10:-0.6180731281817853 +-0.12347625601866746 1:0.10820547757674692 2:0.1825421454873002 3:-0.3412486258429426 4:-0.14925445930975534 5:-0.6594599831395103 6:0.9552502376248448 7:-0.7875626067291472 8:0.3854984181307912 9:0.014303876202374832 10:-0.7300443667550689 +14.546296184422973 1:0.2459523985646046 2:0.9434777073825811 3:0.2112745925235362 4:0.7730688005214974 5:-0.13727994893203732 6:0.6140037510172511 7:0.7545298281668846 8:0.7814551909982614 9:0.0026683642139069264 10:0.5633973602849358 +-19.66731861537172 1:0.9353590082406811 2:0.8768609458072838 3:0.9618210554140587 4:0.12103715737151921 5:-0.7691766106953688 6:-0.4220229608873225 7:-0.18117247651928658 8:-0.14333978019692784 9:-0.31512358142857066 10:0.4022153556528465 +18.84119697288412 1:0.4423204637505467 2:-0.4364821709544735 3:0.3935363893778452 4:-0.7750286735195999 5:-0.6981814766625978 6:0.6889512553826111 7:0.3646791168217727 8:0.0023536025493677837 9:-0.08378048150085249 10:-0.05659381771155503 +17.40329212914592 1:0.9155980216177384 2:-0.35593866074295355 3:0.44775710780914824 4:-0.42914421567532357 5:-0.2734430718503955 6:-0.8937042912745483 7:-0.3143761936611371 8:0.07805814979426184 9:-0.31386151509289784 10:0.6202932236456253 +-19.402336030214553 1:0.462288625222409 2:-0.902975525942725 3:0.7442695642729447 4:0.3802724233363486 5:0.4068685903786069 6:-0.5054707879424198 7:-0.8686166000900748 8:-0.014710838968344575 9:-0.1362606460134499 10:0.8444452252816472 +-3.855123203007599 1:0.5072557393175969 2:0.4626973233672753 3:-0.20910077161652119 4:0.9431415515135266 5:-0.1293690767585638 6:-0.2033835058111637 7:0.501429131658198 8:0.175133281735671 9:-0.6091682952201736 10:0.543010689352589 +1.493768355655548 1:-0.7772812666041105 2:-0.7743738591348672 3:-0.2848754060915175 4:0.3336846848765145 5:0.6219572132443736 6:-0.11144657683793624 7:0.7606913325884337 8:0.8547085151723017 9:-0.31728444617771134 10:-0.4668474022688931 +-17.803626188664516 1:0.5176340000264179 2:0.23048377874011128 3:0.6162746928601832 4:0.16908590014785418 5:0.9695207469685181 6:-0.34713218673384705 7:0.8526833760069625 8:0.9895592279649763 9:0.8805561957342884 10:-0.43452438291417894 +1.4060200157931342 1:-0.41964471941333525 2:0.7738486114171979 3:-0.0964606192284374 4:-0.25351781452566025 5:-0.21065389913054244 6:-0.40490416354122916 7:-0.7696501777959646 8:-0.7710488116813146 9:-0.6777228721053572 10:-0.09381158095961428 +-17.026492264209548 1:0.8367805314799452 2:0.1559190443625338 3:0.048200110551483544 4:-0.7340083467235765 5:0.2661150265782781 6:0.3881661781792165 7:0.9485287302765621 8:0.7201540574376382 9:0.8509234862656003 10:0.9658114866648093 +8.729450606651499 1:0.6404862166906327 2:0.16516090922657822 3:0.29013117743588057 4:0.37056732180613317 5:-0.3376494575302882 6:0.9012625630650577 7:-0.42150978319487 8:-0.05630249989686087 9:0.706104255632954 10:0.01935884085365225 +-5.516822117602276 1:-0.5718348423045241 2:-0.2145777722920088 3:-0.09307467998835195 4:-0.7311274103678378 5:0.5272184003067053 6:-0.00528176138162495 7:0.2852826178935919 8:0.6180999884045897 9:-0.7526372151008776 10:0.20416472532830543 +13.001541259752251 1:-0.5137703877272299 2:-0.15452359837207896 3:-0.25657600903152744 4:-0.9773110735601165 5:0.0718147980090178 6:0.18965211809311744 7:0.7795354990363292 8:0.21976898743223638 9:-0.20364089221752524 10:0.33822332985943304 +18.443388694564348 1:-0.9278344397401963 2:0.2678538727090136 3:-0.46932389854374734 4:0.8494176173177825 5:0.45765527018197694 6:0.20546395745879287 7:-0.199860294349123 8:0.47798730134403256 9:-0.2279771893187592 10:-0.30836118564314274 +8.952089112152663 1:-0.7371671220953286 2:0.8160149639986789 3:-0.026630089188139028 4:0.5931015267817183 5:-0.12216243475451294 6:0.161290795125286 7:0.7423016751095652 8:-0.5212872902985852 9:5.606147011660845E-5 10:-0.409626733921443 +-3.7062463981908027 1:0.5633514321449928 2:0.9914900963311462 3:0.1867799930236702 4:-0.15960235736142847 5:0.1204791067384241 6:-0.7733281422620872 7:-0.887447048141158 8:0.7931515335800692 9:0.732289882696125 10:-0.034992898370363124 +-10.58331129986813 1:0.6627003739767989 2:0.10688718810947728 3:-0.49230090744757216 4:0.8936580036513948 5:0.012227929286241057 6:-0.1442038886014838 7:0.9203452040795139 8:-0.20719832624131262 9:0.29561869366253335 10:-0.08597725084864649 +9.818996211259908 1:0.580133516885796 2:0.07422424429848573 3:0.33438634998226924 4:0.26054797992533696 5:-0.8771304726537796 6:-0.9710990591964794 7:-0.1869287393875041 8:-0.6167738073093247 9:0.34401921428837245 10:0.6737600514607418 +-11.87816749996684 1:-0.7193071334885193 2:0.5247127705364141 3:-0.02978727198197606 4:0.18353223007701058 5:0.40350110058596944 6:-0.36002841871228686 7:-0.20781535546501528 8:0.5517883176456557 9:-0.9938027872744732 10:0.6245061418135955 +-12.198096564661412 1:0.27542314155961156 2:0.3459734388741733 3:-0.38737776987446937 4:0.6244101669171684 5:-0.7801218302490938 6:0.20444733666197523 7:-0.5667599464182904 8:-0.9462131580071358 9:0.5576565405741785 10:-0.9307557040059242 +-3.6610413123521357 1:0.045569951437504086 2:0.32203961277046145 3:-0.04228927426053675 4:-0.9435304938416831 5:0.3750509710699601 6:0.21298970117620142 7:0.5491054691791977 8:0.33695088608872203 9:-0.9923500858828505 10:-0.6402707119893463 +3.782742149409224 1:0.7795250611996376 2:0.43296979846218275 3:-0.6481485005937841 4:0.3235717281667645 5:-0.8067382770768907 6:-0.06740397503468509 7:-0.2835017205434338 8:-0.5875853498478532 9:-0.25699561837680585 10:0.7813561594373908 +-17.065399625876015 1:-0.01772446594568744 2:0.563282914714494 3:0.14232420381013955 4:0.031667902604941345 5:-0.7815348482900619 6:0.3657733497576803 7:0.7208326162626688 8:-0.7863253120180662 9:0.3329194167867533 10:0.6175752945608013 +16.23248797654815 1:0.2615647748812251 2:-0.6631801348538622 3:0.6420349382574477 4:-0.31980528388089846 5:0.38021930887251365 6:-0.060298437830818896 7:-0.8911652782989568 8:0.3424617259589986 9:-0.8515350749364614 10:-0.42354709676980207 +-5.015963911416578 1:-0.07890564237014686 2:-0.09864377281008885 3:-0.13139943914680408 4:0.6610949669857866 5:0.06777579108221987 6:-0.26586245727222835 7:0.17443498956808612 8:-0.3129854922817781 9:-0.37913757211269505 10:0.7627186373372121 +22.647750304177556 1:-0.03666997412165163 2:0.49691867674483814 3:-0.45898559472166967 4:-0.09932248891016404 5:0.05692910907689508 6:-0.5006743461081364 7:0.9992936758550379 8:0.8252525466172065 9:0.9431711015127009 10:-0.4891497061921315 +-3.731112242951253 1:0.44353490207818513 2:0.23112032838224117 3:0.4697682541445527 4:-0.7507514828346664 5:-0.06323257550543837 6:0.0997091431243109 7:0.9394036761509628 8:0.4103869738859962 9:0.6564209227640914 10:-0.5427466755921158 +0.6761872737225261 1:-0.30051626190360503 2:-0.26699232020158803 3:0.8668758741279379 4:-0.40325291744583347 5:-0.9756425738484267 6:-0.5116398654634617 7:0.16424789009043073 8:0.8034099442414044 9:0.8554935001446193 10:0.42747702930667497 +8.449247195197387 1:-0.6588765973399024 2:0.2502285196526799 3:-0.20481547024283087 4:0.3770725284683252 5:-0.169707887761277 6:-0.0804075502584003 7:-0.3580757176408007 8:-0.6042549664471129 9:0.360349278976142 10:0.15899650901110962 +27.111027963108548 1:0.7106841652047162 2:0.6853699382312817 3:-0.8076297545289823 4:0.7932321056591545 5:-0.8011085095234463 6:-0.7017292726737878 7:0.10568649778064154 8:-0.40755358264969255 9:-0.061008981132773865 10:0.08895972651409556 +27.78383192005107 1:-0.8378790218922778 2:-0.6651002504721837 3:0.021049638665430415 4:0.32994334871293196 5:-0.7981304887988308 6:-0.2947962117284566 7:0.9739408711845776 8:0.9442893181893954 9:0.010541491359981059 10:0.8332791453382604 +15.700710963871254 1:-0.538773982400854 2:-0.5966426806845984 3:0.14570292467314627 4:-0.5937791901212952 5:0.7779251136963325 6:0.9962962075803357 7:-0.4774083823748394 8:-0.02528476957876369 9:-0.17305036341254398 10:-0.6013841506503688 +-12.558575788856189 1:0.03250364930617211 2:-0.6723950859659307 3:0.7090474884514901 4:0.25034305882632735 5:0.7036774024093582 6:0.622650236684523 7:0.5776881238206741 8:0.7999754726258337 9:0.21332972563833508 10:0.33849062947231645 +6.2776776518215955 1:-0.009605588630256623 2:0.5786496865369053 3:0.9208276908400748 4:-0.9477397424337148 5:0.6306053656362194 6:0.5396434662389846 7:-0.9841930450269964 8:0.5492682920407823 9:-0.020767248025529206 10:-0.8684655435686472 +6.424586997399564 1:0.861374923392324 2:0.8356037964367176 3:-0.7173479824827564 4:-0.6309584820438245 5:0.16136758138471285 6:-0.7485184163431866 7:-0.006053583829132236 8:-0.8762221084691306 9:0.19195377669247726 10:0.07259634302552964 +-9.64772485466405 1:0.7568015336230662 2:-0.4221524485756756 3:0.011711847664269248 4:0.7387065048724242 5:-0.04347512566745104 6:0.06642100869974654 7:-0.6993705848315939 8:0.16312217088045422 9:-0.11975577990989916 10:-0.6188717473788392 +3.8183706502283647 1:-0.7226937936463145 2:-0.5462756960199258 3:-0.39158419906610664 4:0.014310440945434433 5:-0.9950315917350652 6:-0.1844037449550875 7:0.9023517651879036 8:0.7948752060508435 9:-0.6792702010973877 10:0.40730074403235617 +1.1585019476700562 1:0.5575546848694 2:0.8997032130006739 3:0.6088643323129037 4:0.4872893656051758 5:-0.03977520372748922 6:0.3202565433572042 7:-0.31231768645537206 8:-0.6861153669592381 9:-0.08561643820383291 10:0.522243657731251 +-8.18651039877047 1:-0.809069379967462 2:-0.04827229852445103 3:0.19963602092982624 4:0.2568971171641006 5:-0.0015346733366310428 6:-0.6104625526166494 7:0.7746715041233412 8:-0.7343750018341593 9:-0.49272635466510106 10:-0.8115191199688623 +-3.377690136019927 1:-0.9408187510685164 2:0.9654993263332854 3:-0.16725010447984268 4:0.2574069587853294 5:-0.6930506968932861 6:0.11124762075550176 7:0.39145805505914866 8:0.2906495128462767 9:-0.27454907309824916 10:0.9001175309434777 +12.692571815413245 1:0.7404426710258791 2:0.9060576634778448 3:0.7023712021897308 4:-0.9808126157768493 5:0.03447666475715194 6:-0.4146339211599541 7:-0.7329651749553896 8:-0.2696019807317358 9:-0.9885367164723897 10:-0.8540304023043486 +2.5111054050889354 1:0.7448154454968356 2:-0.7532143233138027 3:-0.9724617436335079 4:0.662620399592766 5:0.45517204589358307 6:0.37409736074838684 7:0.337245076577648 8:0.50951903847353 9:0.2590369923587328 10:-0.3248257475117191 +-8.300340493749207 1:0.5504850435404609 2:0.5077232940244447 3:0.778859307357816 4:0.2601916883813373 5:-0.0032275666062382413 6:0.039752927221862855 7:0.19468432568826755 8:-0.2859531554546477 9:-0.4113477962970582 10:0.43272011953041667 +5.904938653193952 1:0.6622293273002955 2:0.6428891633785236 3:0.6999663090423285 4:0.9132698742913088 5:-0.3960072336866507 6:-0.14500922264286054 7:-0.4390171033743564 8:0.002067106212897185 9:-0.6079874251539117 10:-0.7131416109696531 +5.004048239623824 1:0.7212309895357449 2:0.3425199843383353 3:-0.7290323633040705 4:-0.5563097960397918 5:-0.7577898297822001 6:0.647883070472203 7:-0.23710559062843073 8:0.34398507133293954 9:-0.5440251617348038 10:-0.2971638032112218 +6.21255598077158 1:0.2498685983586959 2:-0.2586857335205359 3:-0.6380810501916263 4:0.17008841621855852 5:0.9485802018202867 6:-0.2580306792121272 7:0.032916516140567786 8:0.32950951532163675 9:-0.9291915084526683 10:0.8454021164786922 +-3.741044592262687 1:0.763300390779396 2:-0.1832552896771813 3:-0.39361907876758573 4:0.9050768615040607 5:-0.8850093869496836 6:0.9302208653737598 7:-0.12972094056755412 8:-0.459442486378308 9:0.5044112394875107 10:0.1399067554681861 +7.378402183384303 1:-0.27686808475610114 2:0.12735524561214606 3:0.5216635958678004 4:-0.9418584785460469 5:0.20441570818728771 6:-0.35073421178920583 7:0.7847501694079704 8:0.3222999552829353 9:0.21025696511089764 10:-0.5813710201294744 +-7.1500991588127265 1:-0.1945259148773102 2:-0.4089845159829022 3:-0.1971859124232922 4:0.9531447983295496 5:0.07996455700202221 6:0.17013529724757648 7:-0.2442095218739362 8:-0.8564146371721229 9:-0.5843910532907555 10:-0.33846471424918767 +-4.288417758202577 1:0.020710986120182184 2:-0.7450564238727908 3:0.3674992023059285 4:0.46737461414601555 5:0.9411702705113052 6:-0.7257365059912877 7:0.5813280037560231 8:-0.01567531846894843 9:0.24734195293533467 10:0.6516001002566887 +5.916426037500391 1:0.8260000862135342 2:-0.11324162495165968 3:0.13061304369435334 4:0.5762591624576425 5:0.548049763999644 6:-0.9751599851764361 7:0.02828821483057764 8:-0.4113286027346803 9:0.8912856976307486 10:-0.8470910204808244 +2.431004294471012 1:0.14088576701299083 2:-0.45104190898994734 3:0.29891134031619115 4:0.955503074037666 5:0.15962522624750242 6:0.7664481093046553 7:0.051697815479792686 8:-0.3471787155014081 9:-0.8007151537631465 10:-0.5598899500902301 +-16.08565904102149 1:0.3946137229565083 2:0.8443779319638349 3:0.5116855547320893 4:-0.5319339991982652 5:0.26564506849312797 6:0.18905397829944448 7:0.1976357098053687 8:0.15505612242632538 9:-0.935633748308776 10:-0.9782957013204887 +18.058440348477184 1:0.8402487524597533 2:-0.6200725197687718 3:-0.6158487677192792 4:0.0709328308135515 5:0.7501256905495493 6:0.38092209802839583 7:-0.8192579128383128 8:-0.9304002828581583 9:-0.6570300818845025 10:-0.5252554781538985 +-1.0026720160736349 1:0.46122079684901474 2:-0.7609201036934166 3:-0.9372178059537293 4:-0.25391036498391006 5:-0.7487429157699828 6:0.38024314675291637 7:0.21886059803198576 8:0.027516853267765207 9:0.33483464322377765 10:0.618580130027746 +-2.6688695419207162 1:-0.8775911623423445 2:-0.6647410420697879 3:0.05948516302547313 4:0.7278526664475804 5:-0.011366224409705028 6:0.33475665968289436 7:-0.6386120399761575 8:0.39609772177595115 9:-0.7872076290319412 10:-0.6195857302948329 +-13.867087895158768 1:-0.9114780602695882 2:0.7997695296649912 3:0.8337252417804881 4:-0.7927267913881113 5:0.6863829853181673 6:0.4162562153517635 7:0.2659922421074139 8:-0.551994669040742 9:-0.6403900338772157 10:-0.8680387717518072 +7.826011095515239 1:-0.2881951904396949 2:-0.19317071325391022 3:-0.06581062483451183 4:-0.6074074436315555 5:-0.9434740067975405 6:0.9426572655575483 7:-0.1812629432036228 8:0.39425575292939863 9:0.5065890539615039 10:0.8969825696966649 +1.4213836206303339 1:0.6996840540120932 2:0.1283999569152492 3:-0.2537375462472613 4:0.24772110606788456 5:0.9040210381745799 6:0.47062010977660207 7:0.9697678931927365 8:-0.9215764371674713 9:-0.27541598110075793 10:0.44277003247067803 +-0.973650798730175 1:-0.2121645467631068 2:-0.6770222508071349 3:-0.5733067523949165 4:0.27979529516037105 5:0.7128588235545461 6:-0.9208763636184307 7:0.14128337151047532 8:-0.002851660400375433 9:0.6943908711123281 10:-0.9201922993121072 +-0.17500848560451965 1:-0.6015070903427717 2:0.7815998200409671 3:-0.9932006200204946 4:-0.3303953411379028 5:-0.3329917860768894 6:-0.2822852019877604 7:0.6834785385197197 8:-0.6458607648553825 9:-0.06171476054995373 10:0.11421513352405444 +-15.310980589416289 1:-0.35290763483001486 2:-0.7263565311032778 3:-0.8688987069582226 4:-0.991098319894185 5:0.7029028082332363 6:-0.20251284356518684 7:-0.10928416773360117 8:0.307764663956116 9:0.6423143148384418 10:-0.15527637175127107 +3.260298266762908 1:-0.7817510582064782 2:0.45336200757318257 3:-0.15365670773321338 4:0.5063951567230205 5:-0.7102867196895872 6:-0.48050036620725955 7:0.9838016675169072 8:0.07854601230194436 9:-0.18953694857147863 10:0.19370072527454107 +3.846123583197846 1:0.6665586449040093 2:-0.2894063530813835 3:0.29965348483445386 4:0.23590344101670313 5:-0.7456743720187828 6:-0.4680876353446175 7:0.8106301610699425 8:0.691280702194663 9:-0.6060141408622055 10:0.34018639920235194 +-10.945919657782932 1:0.7669971723591666 2:0.38702771863552776 3:-0.6664311930513411 4:-0.2817072090916286 5:-0.16955916900934387 6:-0.9425831315444453 7:0.5685476711649924 8:-0.20782258743798265 9:0.015213591474494637 10:0.8183723865760859 +9.820049725467145 1:0.9582163993327679 2:0.7503905881505508 3:0.6255110430336392 4:0.6522701954798096 5:0.09248037700932144 6:-0.2833482854986902 7:-0.9841968940607242 8:-0.9343780716625845 9:-0.605526104070818 10:0.6000165028195326 +11.398715935456183 1:0.6605086903456443 2:0.14675454515266395 3:-0.7880053589830274 4:-0.8570785944515658 5:-0.4317693974151271 6:-0.12244918233307645 7:0.9808241653220866 8:0.5455853515046201 9:0.6870972425676756 10:0.7427686762232875 +-7.846310147695936 1:0.4355817642106965 2:0.7659504362110916 3:-0.3784171977305315 4:-0.5675896574776877 5:-0.20116390539973938 6:0.8775467546326667 7:-0.2824903364469842 8:0.7470660314619617 9:0.8967783051712528 10:0.7133700339519966 +-1.3847391232663768 1:0.3707613476850027 2:0.6931092598460797 3:-0.7701621508103305 4:-0.5679366502518555 5:-0.7234356749703683 6:-0.8059255104944509 7:-0.8307993875388229 8:0.6133975694770035 9:-0.7399749904168824 10:-0.1534990394513953 +16.93981662267873 1:0.6552665678625891 2:0.023248457840923775 3:-0.6850641408327465 4:0.7129790774369389 5:0.04166304042825364 6:-0.7160289667702797 7:-0.4733073680976494 8:0.2720897719417634 9:0.05850741911975099 10:0.34427554125371174 +2.8497179990245116 1:0.6664937514484015 2:0.3343796939204209 3:0.2611910348746209 4:-0.13658810351647 5:-0.5821801257591224 6:0.9854683468621908 7:-0.21396555404689188 8:-0.5923272173716836 9:-0.5674796199927252 10:-0.5681633547764235 +4.981807952389501 1:0.7517426071091595 2:0.7029291090701855 3:0.7126619831046563 4:-0.9982007415355478 5:-0.7743343367502893 6:-0.9048858749551119 7:-0.8243783842398396 8:0.4936163270697016 9:-0.6835495591484724 10:0.8412758607464845 +8.508637575729951 1:0.6837354268578517 2:-0.435346907350056 3:0.6597448795477736 4:0.8870204157376871 5:-0.6938576101541436 6:0.9199495715292882 7:0.33119640706964293 8:-0.6181273221979411 9:0.12929034268333317 10:0.6855150395247027 +14.369378079132883 1:-0.9489372180887643 2:-0.6577177233364067 3:0.543899463531252 4:0.5411152154119976 5:0.43733244485250733 6:0.5927084968109424 7:0.6100068837998656 8:0.9392735722529637 9:-0.9806701698603073 10:0.3984176141500082 +-6.456944198081549 1:0.8380442392342373 2:0.05166133486184443 3:-0.25864153418691704 4:-0.9506672344106888 5:0.5227275493542325 6:-0.03899736644563956 7:0.7660133053649136 8:-0.9375236703284806 9:-0.37213210747743175 10:0.0560768367274771 +-10.041353112580456 1:0.5293717914660876 2:-0.35874932480194044 3:0.14403824250820763 4:-0.4106496629336782 5:-0.794648717231762 6:-0.4369956159772408 7:0.8273613210141495 8:0.9212255384858874 9:0.00409867676727993 10:-0.23796544184855795 +-6.606325361718908 1:0.2765102732490652 2:0.10184669160432525 3:-0.9406443798496789 4:-0.46661976112717896 5:-0.5836573778289609 6:0.1308554421925976 7:0.05232199712543473 8:-0.4965370542771641 9:-0.3695836654343949 10:0.4874427445939513 +-15.359544879832677 1:-0.8253830145927283 2:0.29683545543963885 3:-0.9790356574071053 4:0.33749594518426473 5:-0.449483349548623 6:0.1740013774913005 7:0.5737323257916764 8:0.20159372721320645 9:-0.1812760896634873 10:-0.17652712339895738 +2.1801769966756845 1:0.3664130766917151 2:-0.1929450967547921 3:-0.7834945448457515 4:-0.03806442314852432 5:-0.6167622313628849 6:0.34919852301325394 7:-0.785891329691004 8:-0.5704062599527768 9:0.9846140894872721 10:-0.548571249100203 +-2.7006646885251415 1:-0.48505178676353067 2:0.06347121974094883 3:-0.3704723119141229 4:0.7407080276548548 5:0.06713252857406937 6:-0.2103524488773294 7:-0.9402467715192988 8:-0.8555624501612784 9:0.6244760190429901 10:-0.9038885681517279 +0.2105613019270259 1:-0.17125223509187282 2:-0.23328463772140529 3:-0.6497773470047024 4:0.33111604806115524 5:0.7944287248398398 6:0.5163977380074081 7:-0.025715995643062595 8:0.11762566041047462 9:0.9938658554834845 10:0.5363394203614278 +-0.6433952980357234 1:-0.905126800719938 2:0.5826442985002787 3:-0.8207546276288018 4:-0.0773547002692121 5:-0.6420058913410687 6:-0.9290787206193325 7:0.21829202840889095 8:-0.7752845890678082 9:0.4533233304372326 10:0.5457315861825041 +5.622874731146287 1:0.5486636398086722 2:-0.21867854114956642 3:0.13260110994566032 4:-0.024868470628895967 5:0.9246597814546305 6:0.07490395250443149 7:-0.21327567620097132 8:-0.33970581204395867 9:-0.19408398882121713 10:0.9757334811378136 +-18.27521356600463 1:-0.489685764918109 2:0.6832314342743568 3:0.9115808714640257 4:-4.680515344936964E-4 5:0.03760860984717218 6:0.4344127744883004 7:-0.30019645809377127 8:-0.48339658188341783 9:-0.5488933834939806 10:-0.4735052851773165 +5.518650144654079 1:-0.16881374315243192 2:0.22747702179774354 3:-0.8555270909193926 4:-0.6914231522703247 5:0.03618437407657238 6:-0.8404831131806643 7:0.16378525699004887 8:-0.333895928854854 9:0.23026574917978326 10:0.9409087845740918 +2.5599738684677646 1:-0.24371170373626905 2:-0.1752613047793694 3:-0.7930324885557696 4:0.17288443448968627 5:0.7233942014077801 6:0.47222694561171963 7:0.7878187692414558 8:-0.6520011755878357 9:-0.9952507460157223 10:-0.32951026378415094 +-8.508663400554862 1:0.9194236423060742 2:0.9517284917259223 3:-0.18723709334016392 4:-0.24913001260985546 5:0.8818286401027424 6:0.13661210218384512 7:-0.40792517201812983 8:-0.33132907984544957 9:-0.49137388288628703 10:-0.3273925353006979 +-10.233439586953153 1:0.0960128812383454 2:-0.8611756848964027 3:0.11807312551418647 4:-0.24570750746947145 5:-0.047826307143366886 6:-0.717269426008625 7:-0.2841658181308486 8:-0.31500935950449516 9:0.23183474949267713 10:-0.512986169560546 +-6.3459370724834265 1:0.9537835418930307 2:0.4598546399405288 3:-0.257013655072986 4:-0.29185820894937575 5:-0.6843688281544562 6:0.8343952028925479 7:-0.9656517094615942 8:-0.447440560943553 9:-0.9510349521362857 10:0.5918946980259567 +1.114406550703455 1:-0.5721838436595965 2:0.1201917297381252 3:-0.5253701290141362 4:-0.5874011312890843 5:0.7893580092022578 6:-0.18012813622584134 7:0.4781905737504004 8:-4.6732390143988667E-4 9:-0.7965374182885014 10:-0.8515444146742359 +8.688243146888663 1:0.2245581140502393 2:-0.0697600364101425 3:-0.7661833153629154 4:-0.2289151515902894 5:-0.5643191391300282 6:0.08069861795512168 7:-0.9670317635091523 8:0.14826752863715287 9:0.9325364047311011 10:0.4071178661803092 +14.896035572185347 1:0.20630949870309911 2:-0.5738578325975092 3:0.5664829389128903 4:0.3732752326637825 5:0.04079303403038881 6:-0.6604984910400766 7:0.15136076091734352 8:-0.6244939282579305 9:-0.5236288549540624 10:0.47284992666739023 +4.396558596072123 1:0.5565602414172521 2:0.1444095747909111 3:0.028227502879770272 4:0.38297378287943773 5:-0.26739745457451725 6:-0.708209627997985 7:0.7604483272526881 8:0.8072075261139096 9:0.11460574885028274 10:-0.07669406807610635 +1.7457141275341528 1:0.3668576517164046 2:-0.5352200081463954 3:0.5853385976871426 4:-0.4482551060006992 5:-0.5676795208498786 6:0.8043295590331514 7:-0.02160829797068753 8:0.42281303847010454 9:0.027894531623162466 10:-0.541120112980032 +-15.334767479922341 1:-0.036676500783341615 2:0.804758241454594 3:-0.0642091078911513 4:0.1402705435750966 5:-0.9215322030628859 6:0.7951173116514345 7:-0.994819896842561 8:0.2382406912119326 9:0.6634166177958731 10:0.7623222578718651 +5.017247792012723 1:-0.5925393497160352 2:0.48506599831456443 3:-0.5079795649118319 4:0.6668553329827696 5:-0.1103174867779837 6:0.7048535526809607 7:-0.9819230894106692 8:0.19609620625274982 9:0.5173985272313828 10:-0.11269849619148875 +6.201510810634532 1:-0.6802942101330738 2:0.898957584078176 3:0.853293387559251 4:0.6089336185656065 5:-0.9352626288322801 6:0.3208583332890447 7:-0.964481544931127 8:-0.8294773786068643 9:-0.8817311989413614 10:0.5165364663580934 +19.174935630244647 1:-0.20026105252200788 2:0.7276178994821614 3:0.7748716685190951 4:-0.7423420145576229 5:0.13147770471985032 6:-0.8382015712894606 7:0.021760992104270294 8:-0.24586987823702944 9:-0.05958177281299326 10:0.47347236224860834 +-14.822152909751189 1:0.7255660700197897 2:-0.22751988933383926 3:-0.08409197084114317 4:0.072750455428638 5:0.1841692073989072 6:-0.33838406658716513 7:-0.44701963574290526 8:0.5031210959133143 9:0.09640858549693743 10:0.9857351194637847 +-6.310082095945472 1:-0.7692076133438608 2:0.8533601511731044 3:0.676268298275629 4:-0.783895030001512 5:-0.8195462819549715 6:0.3963101354895673 7:-0.6254922461977397 8:-0.7521135990258581 9:-0.8032003997516024 10:0.8388672800826487 +8.853802632714807 1:0.46950948246522195 2:-0.6148693581037883 3:0.028739220735170656 4:-0.024281643566285815 5:-0.3495458137792231 6:-0.12347196435522867 7:0.5253894065203333 8:0.5100713458262918 9:0.63975795701667 10:0.08644353314625053 +-10.293714040655924 1:-0.17971950768550893 2:-0.6621720204354751 3:0.888036885802737 4:-0.04977483590350751 5:-0.8964991391283221 6:0.6873490822438724 7:0.42369087852118836 8:0.48972554317650663 9:0.8617233178519317 10:-0.8348331836605276 +0.23985611568891863 1:0.050526696983213215 2:0.8544297176525815 3:0.8586358519997579 4:-0.021299752441110487 5:0.2606696929560939 6:-0.39446486150105997 7:-0.4166234435381613 8:-0.6097643266459343 9:0.46633996256010146 10:-0.22521646199731027 +21.57719950299147 1:-0.5878491135126271 2:0.802134056970349 3:-0.5471017580843434 4:0.6067966843473331 5:-0.691712219323007 6:0.7814323754276735 7:0.31689445927290016 8:-0.1668780061940922 9:0.5285692389527452 10:0.8027091025203246 +-0.7836538830323514 1:0.5766794801558166 2:0.8281463568384935 3:0.5087453132796032 4:0.5212853344036532 5:0.6294700781054074 6:-0.9385097739886943 7:-0.13127371407538302 8:0.9845390503404141 9:-0.7224166213906742 10:-0.11155327354295896 +6.710413649604831 1:-0.6919803228062729 2:-0.6526904017578161 3:-0.34211291948607014 4:0.9094842803341618 5:-0.9454398661995895 6:0.3780766512494227 7:0.5823385348738088 8:0.8817830051841733 9:-0.514843382774189 10:0.32579701113259296 +5.384747201245483 1:-0.9661857672086316 2:-0.519769534339731 3:-0.4466396856529564 4:-0.4370113024678448 5:-0.6397400687811474 6:0.08225309277403725 7:-0.25936524603970756 8:-0.1711463274766858 9:-0.42848099098115755 10:-0.8096854737357237 +7.688509532916731 1:0.3892872094452817 2:-0.13306620868059982 3:-0.932974891205117 4:-0.8921357494146682 5:0.4806996560679244 6:-0.21500288444218696 7:-0.8911268070046585 8:-0.9510264953215406 9:0.1899740993687098 10:-0.43944320580463536 +2.2546997585565296 1:-0.5963883101717473 2:-0.01115153603404151 3:0.8781871380140298 4:0.7736250964135891 5:-0.7325745711528668 6:0.2518631794989008 7:0.5760249284318746 8:0.8690107952725199 9:0.02320853138646095 10:0.08570951531344417 +5.597710012706039 1:-0.5323512235815979 2:0.03366944321271936 3:0.5143537675853551 4:0.28471250955283445 5:0.4012202634439719 6:0.12032039285431151 7:-0.08108716844967812 8:0.30231384371011294 9:0.03259115565303028 10:0.9567467516929173 +-12.977848725392104 1:-0.5908891529017144 2:-0.7678208242918028 3:0.8512434510178621 4:-0.14910196410347298 5:0.6250260229199651 6:0.5393378705290228 7:-0.9573580597625002 8:-0.864881502860934 9:0.4175735160503429 10:0.4872169215922426 +10.35887243981476 1:-0.09126023790482862 2:0.18852634121926526 3:-0.13523918100503107 4:0.8333842692409983 5:-0.6015442103644761 6:0.5347736461652235 7:-0.823489760471118 8:0.5562688292037381 9:-0.807478561291906 10:-0.666881464988351 +0.4250502150408626 1:0.7771717566171905 2:-0.8729202752916785 3:-0.25782888805127024 4:-0.13605474993771205 5:0.5911781118120025 6:-0.8444023967853633 7:0.6787302541469229 8:-0.5444299313083194 9:0.356121883138657 10:-0.8845333845080687 +-0.8743487925900991 1:-0.9087681208947878 2:-0.292625136739453 3:-0.35113758823291774 4:-0.705933223571676 5:-0.6882289471031144 6:0.8350131255297044 7:-0.7659016065609232 8:0.11400114955653207 9:-0.9466143658505732 10:-0.5033643125229932 +-5.615143641864686 1:-0.6688289820084299 2:-0.4623159855015393 3:0.012827807007503855 4:-0.44521264878006117 5:-0.5563111031201406 6:-0.6065295981983794 7:0.3806712426786838 8:-0.11317152118817408 9:0.507896127467435 10:-0.8487801189674464 +-0.1829397047693725 1:0.09377558075225512 2:0.5774384503027374 3:-0.7104684187448009 4:-0.07285914169135976 5:-0.8797920488335114 6:0.6099615504974201 7:-0.8047440624324915 8:-0.6877856114263066 9:0.5843004021777447 10:0.5190581455348131 +18.479680552020344 1:0.9635517137863321 2:0.9954507816218203 3:0.11959899129360774 4:0.3753283274192787 5:-0.9386713095183621 6:0.0926833703812433 7:0.48003949462701323 8:0.9432769781973132 9:-0.9637036991931129 10:-0.4064407447273508 +1.3850645873427236 1:0.14476184437006356 2:-0.11280617018445871 3:-0.4385084538142101 4:-0.5961619435136434 5:0.419554626795412 6:-0.5047767472761191 7:0.457180284958592 8:-0.9129360314541999 9:-0.6320022059786656 10:-0.44989608519659363 diff --git a/examples/linear-regression/build.sh b/examples/linear-regression/build.sh new file mode 100755 index 000000000..da373645b --- /dev/null +++ b/examples/linear-regression/build.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +mvn clean package diff --git a/examples/linear-regression/pom.xml b/examples/linear-regression/pom.xml new file mode 100644 index 000000000..9e4f4178f --- /dev/null +++ b/examples/linear-regression/pom.xml @@ -0,0 +1,94 @@ + + 4.0.0 + + com.intel.oap + oap-mllib-examples + ${oap.version}-with-spark-${spark.version} + jar + + LinearRegressionExample + https://github.com/oap-project/oap-mllib.git + + + UTF-8 + 1.1.0 + 2.12.10 + 2.12 + 3.0.0 + + + + + + org.scala-lang + scala-library + 2.12.10 + + + + com.github.scopt + scopt_2.12 + 3.7.0 + + + + org.apache.spark + spark-sql_2.12 + ${spark.version} + provided + + + + org.apache.spark + spark-mllib_2.12 + ${spark.version} + provided + + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + + compile + testCompile + + + + + ${scala.version} + + -target:jvm-1.8 + + + + + maven-assembly-plugin + 3.0.0 + + false + + jar-with-dependencies + + + + + assembly + package + + single + + + + + + + + diff --git a/examples/linear-regression/run.sh b/examples/linear-regression/run.sh new file mode 100755 index 000000000..9829b55bc --- /dev/null +++ b/examples/linear-regression/run.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +source ../../conf/env.sh + +APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar +APP_CLASS=org.apache.spark.examples.ml.LinearRegressionExample +DATA_FILE=data/sample_linear_regression_data.txt + +OAP_MLLIB_ENABLED=true + +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ + --num-executors $SPARK_NUM_EXECUTORS \ + --driver-memory $SPARK_DRIVER_MEMORY \ + --executor-cores $SPARK_EXECUTOR_CORES \ + --executor-memory $SPARK_EXECUTOR_MEMORY \ + --conf "spark.oap.mllib.enabled=$OAP_MLLIB_ENABLED" \ + --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ + --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ + --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ + --conf "spark.shuffle.reduceLocality.enabled=false" \ + --conf "spark.network.timeout=1200s" \ + --conf "spark.task.maxFailures=1" \ + --jars $OAP_MLLIB_JAR \ + --class $APP_CLASS \ + $APP_JAR $DATA_FILE \ + 2>&1 | tee LinearRegression-$(date +%m%d_%H_%M_%S).log diff --git a/examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala new file mode 100644 index 000000000..2c72f7702 --- /dev/null +++ b/examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// scalastyle:off println +package org.apache.spark.examples.ml + +import scopt.OptionParser + +import org.apache.spark.ml.regression.LinearRegression +import org.apache.spark.sql.{DataFrame, SparkSession} + +/** + * An example runner for linear regression with elastic-net (mixing L1/L2) regularization. + * Run with + * {{{ + * bin/run-example ml.LinearRegressionExample [options] + * }}} + * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt` which can be + * trained by + * {{{ + * bin/run-example ml.LinearRegressionExample --regParam 0.15 --elasticNetParam 1.0 \ + * data/mllib/sample_linear_regression_data.txt + * }}} + * If you use it as a template to create your own app, please use `spark-submit` to submit your app. + */ +object LinearRegressionExample { + + case class Params( + input: String = null, + testInput: String = "", + dataFormat: String = "libsvm", + regParam: Double = 0.0, + elasticNetParam: Double = 0.0, + maxIter: Int = 100, + tol: Double = 1E-6, + fracTest: Double = 0.2) + + def main(args: Array[String]): Unit = { + val defaultParams = Params() + + val parser = new OptionParser[Params]("LinearRegressionExample") { + head("LinearRegressionExample: an example Linear Regression with Elastic-Net app.") + opt[Double]("regParam") + .text(s"regularization parameter, default: ${defaultParams.regParam}") + .action((x, c) => c.copy(regParam = x)) + opt[Double]("elasticNetParam") + .text(s"ElasticNet mixing parameter. For alpha = 0, the penalty is an L2 penalty. " + + s"For alpha = 1, it is an L1 penalty. For 0 < alpha < 1, the penalty is a combination of " + + s"L1 and L2, default: ${defaultParams.elasticNetParam}") + .action((x, c) => c.copy(elasticNetParam = x)) + opt[Int]("maxIter") + .text(s"maximum number of iterations, default: ${defaultParams.maxIter}") + .action((x, c) => c.copy(maxIter = x)) + opt[Double]("tol") + .text(s"the convergence tolerance of iterations, Smaller value will lead " + + s"to higher accuracy with the cost of more iterations, default: ${defaultParams.tol}") + .action((x, c) => c.copy(tol = x)) + opt[Double]("fracTest") + .text(s"fraction of data to hold out for testing. If given option testInput, " + + s"this option is ignored. default: ${defaultParams.fracTest}") + .action((x, c) => c.copy(fracTest = x)) + opt[String]("testInput") + .text(s"input path to test dataset. If given, option fracTest is ignored." + + s" default: ${defaultParams.testInput}") + .action((x, c) => c.copy(testInput = x)) + opt[String]("dataFormat") + .text("data format: libsvm (default), dense (deprecated in Spark v1.1)") + .action((x, c) => c.copy(dataFormat = x)) + arg[String]("") + .text("input path to labeled examples") + .required() + .action((x, c) => c.copy(input = x)) + checkConfig { params => + if (params.fracTest < 0 || params.fracTest >= 1) { + failure(s"fracTest ${params.fracTest} value incorrect; should be in [0,1).") + } else { + success + } + } + } + + parser.parse(args, defaultParams) match { + case Some(params) => run(params) + case _ => sys.exit(1) + } + } + + def run(params: Params): Unit = { + val spark = SparkSession + .builder + .appName(s"LinearRegressionExample with $params") + .getOrCreate() + + println(s"LinearRegressionExample with parameters:\n$params") + + val training = spark.read.format("libsvm") + .load(params.input) + + val lir = new LinearRegression() + .setFeaturesCol("features") + .setLabelCol("label") + .setRegParam(params.regParam) + .setElasticNetParam(params.elasticNetParam) + .setMaxIter(params.maxIter) + .setTol(params.tol) + + // Train the model + val startTime = System.nanoTime() + val lirModel = lir.fit(training) + val elapsedTime = (System.nanoTime() - startTime) / 1e9 + println(s"Training time: $elapsedTime seconds") + + // Print the coefficients and intercept for linear regression + println(s"Coefficients: ${lirModel.coefficients} Intercept: ${lirModel.intercept}") + + // Summarize the model over the training set and print out some metrics + val trainingSummary = lirModel.summary + println(s"numIterations: ${trainingSummary.totalIterations}") + println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") + trainingSummary.residuals.show() + println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") + println(s"r2: ${trainingSummary.r2}") + + spark.stop() + } +} +// scalastyle:on println diff --git a/mllib-dal/src/main/java/org/apache/spark/ml/regression/LiRResult.java b/mllib-dal/src/main/java/org/apache/spark/ml/regression/LiRResult.java new file mode 100644 index 000000000..65d30f85c --- /dev/null +++ b/mllib-dal/src/main/java/org/apache/spark/ml/regression/LiRResult.java @@ -0,0 +1,21 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +package org.apache.spark.ml.regression; + +public class LiRResult { + public long coeffNumericTable; // first element of coeff is actually intercept +} diff --git a/mllib-dal/src/main/native/LinearRegressionDALImpl.cpp b/mllib-dal/src/main/native/LinearRegressionDALImpl.cpp new file mode 100644 index 000000000..390d35ddd --- /dev/null +++ b/mllib-dal/src/main/native/LinearRegressionDALImpl.cpp @@ -0,0 +1,250 @@ +/******************************************************************************* + * Copyright 2020 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include +#include +#include +#include +#include + +#include "OneCCL.h" +#include "org_apache_spark_ml_regression_LinearRegressionDALImpl.h" +#include "service.h" + +using namespace std; +using namespace daal; +using namespace daal::algorithms; + +typedef double algorithmFPType; /* Algorithm floating-point type */ + +static NumericTablePtr linear_regression_compute(int rankId, + ccl::communicator &comm, + const NumericTablePtr &pData, + const NumericTablePtr &pLabel, + size_t nBlocks) { + using daal::byte; + + linear_regression::training::Distributed localAlgorithm; + + /* Pass a training data set and dependent values to the algorithm */ + localAlgorithm.input.set(linear_regression::training::data, pData); + localAlgorithm.input.set(linear_regression::training::dependentVariables, + pLabel); + + /* Train the multiple linear regression model on local nodes */ + localAlgorithm.compute(); + + /* Serialize partial results required by step 2 */ + services::SharedPtr serializedData; + InputDataArchive dataArch; + localAlgorithm.getPartialResult()->serialize(dataArch); + size_t perNodeArchLength = dataArch.getSizeOfArchive(); + + serializedData = + services::SharedPtr(new byte[perNodeArchLength * nBlocks]); + + byte *nodeResults = new byte[perNodeArchLength]; + dataArch.copyArchiveToArray(nodeResults, perNodeArchLength); + std::vector aReceiveCount(comm.size(), + perNodeArchLength); // 4 x "14016" + + /* Transfer partial results to step 2 on the root node */ + ccl::gather((int8_t *)nodeResults, perNodeArchLength, + (int8_t *)(serializedData.get()), perNodeArchLength, comm) + .wait(); + + delete[] nodeResults; + + NumericTablePtr resultTable; + if (rankId == ccl_root) { + /* Create an algorithm object to build the final multiple linear + * regression model on the master node */ + linear_regression::training::Distributed masterAlgorithm; + + for (size_t i = 0; i < nBlocks; i++) { + /* Deserialize partial results from step 1 */ + OutputDataArchive dataArch(serializedData.get() + + perNodeArchLength * i, + perNodeArchLength); + + linear_regression::training::PartialResultPtr + dataForStep2FromStep1 = + linear_regression::training::PartialResultPtr( + new linear_regression::training::PartialResult()); + dataForStep2FromStep1->deserialize(dataArch); + + /* Set the local multiple linear regression model as input for the + * master-node algorithm */ + masterAlgorithm.input.add( + linear_regression::training::partialModels, + dataForStep2FromStep1); + } + + /* Merge and finalizeCompute the multiple linear regression model on the + * master node */ + masterAlgorithm.compute(); + masterAlgorithm.finalizeCompute(); + + /* Retrieve the algorithm results */ + linear_regression::training::ResultPtr trainingResult = + masterAlgorithm.getResult(); + resultTable = + trainingResult->get(linear_regression::training::model)->getBeta(); + + printNumericTable(resultTable, + "Linear Regression first 20 columns of " + "coefficients (w0, w1..wn):", + 1, 20); + } + return resultTable; +} + +static NumericTablePtr ridge_regression_compute( + int rankId, ccl::communicator &comm, const NumericTablePtr &pData, + const NumericTablePtr &pLabel, double regParam, size_t nBlocks) { + + using daal::byte; + + NumericTablePtr ridgeParams(new HomogenNumericTable( + 1, 1, NumericTable::doAllocate, regParam)); + + ridge_regression::training::Distributed localAlgorithm; + localAlgorithm.parameter.ridgeParameters = ridgeParams; + + /* Pass a training data set and dependent values to the algorithm */ + localAlgorithm.input.set(ridge_regression::training::data, pData); + localAlgorithm.input.set(ridge_regression::training::dependentVariables, + pLabel); + + /* Train the multiple ridge regression model on local nodes */ + localAlgorithm.compute(); + + /* Serialize partial results required by step 2 */ + services::SharedPtr serializedData; + InputDataArchive dataArch; + localAlgorithm.getPartialResult()->serialize(dataArch); + size_t perNodeArchLength = dataArch.getSizeOfArchive(); + // std::cout << "perNodeArchLength: " << perNodeArchLength << std::endl; + + serializedData = + services::SharedPtr(new byte[perNodeArchLength * nBlocks]); + + byte *nodeResults = new byte[perNodeArchLength]; + dataArch.copyArchiveToArray(nodeResults, perNodeArchLength); + std::vector aReceiveCount(comm.size(), + perNodeArchLength); // 4 x "14016" + + /* Transfer partial results to step 2 on the root node */ + ccl::gather((int8_t *)nodeResults, perNodeArchLength, + (int8_t *)(serializedData.get()), perNodeArchLength, comm) + .wait(); + + delete[] nodeResults; + + NumericTablePtr resultTable; + if (rankId == ccl_root) { + /* Create an algorithm object to build the final multiple ridge + * regression model on the master node */ + ridge_regression::training::Distributed masterAlgorithm; + + for (size_t i = 0; i < nBlocks; i++) { + /* Deserialize partial results from step 1 */ + OutputDataArchive dataArch(serializedData.get() + + perNodeArchLength * i, + perNodeArchLength); + + ridge_regression::training::PartialResultPtr dataForStep2FromStep1 = + ridge_regression::training::PartialResultPtr( + new ridge_regression::training::PartialResult()); + dataForStep2FromStep1->deserialize(dataArch); + + /* Set the local multiple ridge regression model as input for the + * master-node algorithm */ + masterAlgorithm.input.add(ridge_regression::training::partialModels, + dataForStep2FromStep1); + } + + /* Merge and finalizeCompute the multiple ridge regression model on the + * master node */ + masterAlgorithm.compute(); + masterAlgorithm.finalizeCompute(); + + /* Retrieve the algorithm results */ + ridge_regression::training::ResultPtr trainingResult = + masterAlgorithm.getResult(); + resultTable = + trainingResult->get(ridge_regression::training::model)->getBeta(); + + printNumericTable(resultTable, + "Ridge Regression first 20 columns of " + "coefficients (w0, w1..wn):", + 1, 20); + } + return resultTable; +} + +/* + * Class: org_apache_spark_ml_regression_LinearRegressionDALImpl + * Method: cLinearRegressionTrainDAL + * Signature: (JJDDIILorg/apache/spark/ml/regression/LiRResult;)J + */ +JNIEXPORT jlong JNICALL +Java_org_apache_spark_ml_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL( + JNIEnv *env, jobject obj, jlong pNumTabData, jlong pNumTabLabel, + jdouble regParam, jdouble elasticNetParam, + jint executor_num, jint executor_cores, jobject resultObj) { + + ccl::communicator &comm = getComm(); + size_t rankId = comm.rank(); + + NumericTablePtr pLabel = *((NumericTablePtr *)pNumTabLabel); + NumericTablePtr pData = *((NumericTablePtr *)pNumTabData); + + // Set number of threads for oneDAL to use for each rank + services::Environment::getInstance()->setNumberOfThreads(executor_cores); + + int nThreadsNew = + services::Environment::getInstance()->getNumberOfThreads(); + cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew + << endl; + + NumericTablePtr resultTable; + + if (regParam == 0) { + resultTable = linear_regression_compute(rankId, comm, pData, pLabel, + executor_num); + } else { + resultTable = ridge_regression_compute(rankId, comm, pData, pLabel, + regParam, executor_num); + } + + if (rankId == ccl_root) { + // Get the class of the result object + jclass clazz = env->GetObjectClass(resultObj); + + // Get Field references + jfieldID coeffNumericTableField = + env->GetFieldID(clazz, "coeffNumericTable", "J"); + + NumericTablePtr *coeffvectors = new NumericTablePtr(resultTable); + env->SetLongField(resultObj, coeffNumericTableField, + (jlong)coeffvectors); + + // intercept is already in first column of coeffvectors + return (jlong)coeffvectors; + } else + return (jlong)0; +} diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile index bf05e87d9..9e1ec293f 100644 --- a/mllib-dal/src/main/native/Makefile +++ b/mllib-dal/src/main/native/Makefile @@ -38,14 +38,16 @@ CPP_SRCS += \ ./KMeansDALImpl.cpp \ ./PCADALImpl.cpp \ ./ALSDALImpl.cpp ./ALSShuffle.cpp \ - ./NaiveBayesDALImpl.cpp + ./NaiveBayesDALImpl.cpp \ + ./LinearRegressionDALImpl.cpp OBJS += \ ./OneCCL.o ./OneDAL.o ./service.o ./error_handling.o \ ./KMeansDALImpl.o \ ./PCADALImpl.o \ ./ALSDALImpl.o ./ALSShuffle.o \ - ./NaiveBayesDALImpl.o + ./NaiveBayesDALImpl.o \ + ./LinearRegressionDALImpl.o # Output Binary OUTPUT = ../../../target/libMLlibDAL.so diff --git a/mllib-dal/src/main/native/build-jni.sh b/mllib-dal/src/main/native/build-jni.sh index 3f79bddf5..4da92dd67 100755 --- a/mllib-dal/src/main/native/build-jni.sh +++ b/mllib-dal/src/main/native/build-jni.sh @@ -35,3 +35,4 @@ javah -d $WORK_DIR/javah -classpath "$WORK_DIR/../../../target/classes:$DAAL_JAR org.apache.spark.ml.feature.PCADALImpl \ org.apache.spark.ml.recommendation.ALSDALImpl \ org.apache.spark.ml.classification.NaiveBayesDALImpl \ + org.apache.spark.ml.regression.LinearRegressionDALImpl diff --git a/mllib-dal/src/main/native/javah/org_apache_spark_ml_regression_LinearRegressionDALImpl.h b/mllib-dal/src/main/native/javah/org_apache_spark_ml_regression_LinearRegressionDALImpl.h new file mode 100644 index 000000000..bd8d85800 --- /dev/null +++ b/mllib-dal/src/main/native/javah/org_apache_spark_ml_regression_LinearRegressionDALImpl.h @@ -0,0 +1,21 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class org_apache_spark_ml_regression_LinearRegressionDALImpl */ + +#ifndef _Included_org_apache_spark_ml_regression_LinearRegressionDALImpl +#define _Included_org_apache_spark_ml_regression_LinearRegressionDALImpl +#ifdef __cplusplus +extern "C" { +#endif +/* + * Class: org_apache_spark_ml_regression_LinearRegressionDALImpl + * Method: cLinearRegressionTrainDAL + * Signature: (JJDDIILorg/apache/spark/ml/regression/LiRResult;)J + */ +JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL + (JNIEnv *, jobject, jlong, jlong, jdouble, jdouble, jint, jint, jobject); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/regression/LinearRegressionDALImpl.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/LinearRegressionDALImpl.scala new file mode 100644 index 000000000..73ae9be3d --- /dev/null +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/regression/LinearRegressionDALImpl.scala @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import org.apache.spark.internal.Logging +import org.apache.spark.ml.linalg.{DenseVector, Vector} +import org.apache.spark.ml.util.Utils.getOneCCLIPPort +import org.apache.spark.ml.util.{Instrumentation, OneCCL, OneDAL} +import org.apache.spark.sql.Dataset + +/** + * Model fitted by [[LinearRegressionDALImpl]]. + * + * @param coefficients model coefficients + * @param intercept model intercept + * @param diagInvAtWA diagonal of matrix (A^T * W * A)^-1 + * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. + */ +// LinearRegressionDALModel is the same with WeightedLeastSquaresModel +// diagInvAtWA and objectiveHistory are not supported right now +private[ml] class LinearRegressionDALModel(val coefficients: DenseVector, + val intercept: Double, + val diagInvAtWA: DenseVector, + val objectiveHistory: Array[Double]) + extends Serializable { +} + +class LinearRegressionDALImpl( val fitIntercept: Boolean, + val regParam: Double, + val elasticNetParam: Double, + val standardizeFeatures: Boolean, + val standardizeLabel: Boolean, + val executorNum: Int, + val executorCores: Int) + extends Serializable with Logging { + + require(regParam >= 0.0, s"regParam cannot be negative: $regParam") + require(elasticNetParam >= 0.0 && elasticNetParam <= 1.0, + s"elasticNetParam must be in [0, 1]: $elasticNetParam") + + /** + * Creates a [[LinearRegressionDALModel]] from an RDD of [[Vector]]s. + */ + def train(labeledPoints: Dataset[_], + instr: Option[Instrumentation]): LinearRegressionDALModel = { + + val kvsIPPort = getOneCCLIPPort(labeledPoints.rdd) + + val labeledPointsTables = if (OneDAL.isDenseDataset(labeledPoints)) { + OneDAL.rddLabeledPointToMergedTables(labeledPoints, executorNum) + } else { + OneDAL.rddLabeledPointToSparseTables(labeledPoints, executorNum) + } + + val results = labeledPointsTables.mapPartitionsWithIndex { + case (rank: Int, tables: Iterator[(Long, Long)]) => + val (featureTabAddr, lableTabAddr) = tables.next() + + OneCCL.init(executorNum, rank, kvsIPPort) + + val result = new LiRResult + cLinearRegressionTrainDAL( + featureTabAddr, + lableTabAddr, + regParam, + elasticNetParam, + executorNum, + executorCores, + result + ) + + val ret = if (OneCCL.isRoot()) { + val coefficientArray = OneDAL.numericTableToVectors( + OneDAL.makeNumericTable(result.coeffNumericTable)) + Iterator(coefficientArray(0)) + } else { + Iterator.empty + } + + OneCCL.cleanup() + ret + }.collect() + + // Make sure there is only one result from rank 0 + assert(results.length == 1) + + val coefficientVector = results(0) + + val parentModel = new LinearRegressionDALModel( + new DenseVector(coefficientVector.toArray.slice(1, coefficientVector.size)), + coefficientVector(0), new DenseVector(Array(0D)), Array(0D)) + + parentModel + } + + // Single entry to call Linear Regression DAL backend with parameters + @native private def cLinearRegressionTrainDAL(data: Long, + label: Long, + regParam: Double, + elasticNetParam: Double, + executor_num: Int, + executor_cores: Int, + result: LiRResult): Long + +} diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala index 42b1eef35..2c698f874 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/OneDAL.scala @@ -161,9 +161,16 @@ object OneDAL { val spark = SparkSession.active import spark.implicits._ - labeledPoints.cache().count() + // Repartition to executorNum if not enough partitions + val dataForConversion = if (labeledPoints.rdd.getNumPartitions < executorNum) { + labeledPoints.repartition(executorNum).cache() + } else { + labeledPoints + } - val labeledPointsRDD = labeledPoints.toDF().map { + dataForConversion.cache().count() + + val labeledPointsRDD = dataForConversion.toDF().map { case Row(label: Double, features: Vector) => (features, label) }.rdd @@ -248,6 +255,11 @@ object OneDAL { rowOffsets += indexValues + 1 val contextLocal = new DaalContext() + + // oneDAL libs should be loaded by now, loading other native libs + logger.info("Loading native libraries") + LibLoader.loadLibraries() + val cTable = OneDAL.cNewCSRNumericTableDouble(values, columnIndices, rowOffsets.toArray, nFeatures, csrRowNum) val table = new CSRNumericTable(contextLocal, cTable) diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala index d59493f95..ab6f34a49 100644 --- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala +++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala @@ -17,13 +17,18 @@ package org.apache.spark.ml.util import java.net.InetAddress - import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.ml.linalg.Vector import org.apache.spark.rdd.RDD +import org.apache.spark.sql.SparkSession object Utils { + def isOAPEnabled(): Boolean = { + val sc = SparkSession.active.sparkContext + return sc.conf.getBoolean("spark.oap.mllib.enabled", true) + } + def getOneCCLIPPort(data: RDD[_]): String = { val executorIPAddress = Utils.sparkFirstExecutorIP(data.sparkContext) val kvsIP = data.sparkContext.conf.get("spark.oap.mllib.oneccl.kvs.ip", diff --git a/mllib-dal/src/spark-3.0.0/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib-dal/src/spark-3.0.0/main/scala/org/apache/spark/ml/regression/LinearRegression.scala new file mode 100644 index 000000000..1384e4753 --- /dev/null +++ b/mllib-dal/src/spark-3.0.0/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -0,0 +1,1063 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import scala.collection.mutable + +import breeze.linalg.{DenseVector => BDV} +import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import breeze.stats.distributions.StudentsT +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.annotation.Since +import org.apache.spark.internal.Logging +import org.apache.spark.ml.{PipelineStage, PredictorParams} +import org.apache.spark.ml.feature.Instance +import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.linalg.BLAS._ +import org.apache.spark.ml.optim.WeightedLeastSquares +import org.apache.spark.ml.optim.aggregator.{HuberAggregator, LeastSquaresAggregator} +import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction} +import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.stat._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.util.Instrumentation.instrumented +import org.apache.spark.mllib.evaluation.RegressionMetrics +import org.apache.spark.mllib.linalg.VectorImplicits._ +import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel} +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.VersionUtils.majorMinorVersion + +/** + * Params for linear regression. + */ +private[regression] trait LinearRegressionParams extends PredictorParams + with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol + with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver + with HasAggregationDepth with HasLoss { + + import LinearRegression._ + + /** + * The solver algorithm for optimization. + * Supported options: "l-bfgs", "normal" and "auto". + * Default: "auto" + * + * @group param + */ + @Since("1.6.0") + final override val solver: Param[String] = new Param[String](this, "solver", + "The solver algorithm for optimization. Supported options: " + + s"${supportedSolvers.mkString(", ")}. (Default auto)", + ParamValidators.inArray[String](supportedSolvers)) + + /** + * The loss function to be optimized. + * Supported options: "squaredError" and "huber". + * Default: "squaredError" + * + * @group param + */ + @Since("2.3.0") + final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" + + s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default squaredError)", + ParamValidators.inArray[String](supportedLosses)) + + /** + * The shape parameter to control the amount of robustness. Must be > 1.0. + * At larger values of epsilon, the huber criterion becomes more similar to least squares + * regression; for small values of epsilon, the criterion is more similar to L1 regression. + * Default is 1.35 to get as much robustness as possible while retaining + * 95% statistical efficiency for normally distributed data. It matches sklearn + * HuberRegressor and is "M" from + * A robust hybrid of lasso and ridge regression. + * Only valid when "loss" is "huber". + * + * @group expertParam + */ + @Since("2.3.0") + final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " + + "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0)) + + /** @group getExpertParam */ + @Since("2.3.0") + def getEpsilon: Double = $(epsilon) + + override protected def validateAndTransformSchema( + schema: StructType, + fitting: Boolean, + featuresDataType: DataType): StructType = { + if (fitting) { + if ($(loss) == Huber) { + require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " + + "normal solver, please change solver to auto or l-bfgs.") + require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " + + s"L2 regularization, but got elasticNetParam = $getElasticNetParam.") + } + } + super.validateAndTransformSchema(schema, fitting, featuresDataType) + } +} + +/** + * Linear regression. + * + * The learning objective is to minimize the specified loss function, with regularization. + * This supports two kinds of loss: + * - squaredError (a.k.a squared loss) + * - huber (a hybrid of squared error for relatively small errors and absolute error for + * relatively large ones, and we estimate the scale parameter from training data) + * + * This supports multiple types of regularization: + * - none (a.k.a. ordinary least squares) + * - L2 (ridge regression) + * - L1 (Lasso) + * - L2 + L1 (elastic net) + * + * The squared error objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w}\frac{1}{2n}{\sum_{i=1}^n(X_{i}w - y_{i})^{2} + + * \lambda\left[\frac{1-\alpha}{2}{||w||_{2}}^{2} + \alpha{||w||_{1}}\right]} + * \end{align} + * $$ + *
+ * + * The huber objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma + + * H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2} + * \end{align} + * $$ + *
+ * + * where + * + *
+ * $$ + * \begin{align} + * H_m(z) = \begin{cases} + * z^2, & \text {if } |z| < \epsilon, \\ + * 2\epsilon|z| - \epsilon^2, & \text{otherwise} + * \end{cases} + * \end{align} + * $$ + *
+ * + * Note: Fitting with huber loss only supports none and L2 regularization. + */ +@Since("1.3.0") +class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) + extends Regressor[Vector, LinearRegression, LinearRegressionModel] + with LinearRegressionParams with DefaultParamsWritable with Logging { + + import LinearRegression._ + + @Since("1.4.0") + def this() = this(Identifiable.randomUID("linReg")) + + /** + * Set the regularization parameter. + * Default is 0.0. + * + * @group setParam + */ + @Since("1.3.0") + def setRegParam(value: Double): this.type = set(regParam, value) + setDefault(regParam -> 0.0) + + /** + * Set if we should fit the intercept. + * Default is true. + * + * @group setParam + */ + @Since("1.5.0") + def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) + setDefault(fitIntercept -> true) + + /** + * Whether to standardize the training features before fitting the model. + * The coefficients of models will be always returned on the original scale, + * so it will be transparent for users. + * Default is true. + * + * @note With/without standardization, the models should be always converged + * to the same solution when no regularization is applied. In R's GLMNET package, + * the default behavior is true as well. + * + * @group setParam + */ + @Since("1.5.0") + def setStandardization(value: Boolean): this.type = set(standardization, value) + setDefault(standardization -> true) + + /** + * Set the ElasticNet mixing parameter. + * For alpha = 0, the penalty is an L2 penalty. + * For alpha = 1, it is an L1 penalty. + * For alpha in (0,1), the penalty is a combination of L1 and L2. + * Default is 0.0 which is an L2 penalty. + * + * Note: Fitting with huber loss only supports None and L2 regularization, + * so throws exception if this param is non-zero value. + * + * @group setParam + */ + @Since("1.4.0") + def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) + setDefault(elasticNetParam -> 0.0) + + /** + * Set the maximum number of iterations. + * Default is 100. + * + * @group setParam + */ + @Since("1.3.0") + def setMaxIter(value: Int): this.type = set(maxIter, value) + setDefault(maxIter -> 100) + + /** + * Set the convergence tolerance of iterations. + * Smaller value will lead to higher accuracy with the cost of more iterations. + * Default is 1E-6. + * + * @group setParam + */ + @Since("1.4.0") + def setTol(value: Double): this.type = set(tol, value) + setDefault(tol -> 1E-6) + + /** + * Whether to over-/under-sample training instances according to the given weights in weightCol. + * If not set or empty, all instances are treated equally (weight 1.0). + * Default is not set, so all instances have weight one. + * + * @group setParam + */ + @Since("1.6.0") + def setWeightCol(value: String): this.type = set(weightCol, value) + + /** + * Set the solver algorithm used for optimization. + * In case of linear regression, this can be "l-bfgs", "normal" and "auto". + * - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton + * optimization method. + * - "normal" denotes using Normal Equation as an analytical solution to the linear regression + * problem. This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`. + * - "auto" (default) means that the solver algorithm is selected automatically. + * The Normal Equations solver will be used when possible, but this will automatically fall + * back to iterative optimization methods when needed. + * + * Note: Fitting with huber loss doesn't support normal solver, + * so throws exception if this param was set with "normal". + * @group setParam + */ + @Since("1.6.0") + def setSolver(value: String): this.type = set(solver, value) + setDefault(solver -> Auto) + + /** + * Suggested depth for treeAggregate (greater than or equal to 2). + * If the dimensions of features or the number of partitions are large, + * this param could be adjusted to a larger size. + * Default is 2. + * + * @group expertSetParam + */ + @Since("2.1.0") + def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) + setDefault(aggregationDepth -> 2) + + /** + * Sets the value of param [[loss]]. + * Default is "squaredError". + * + * @group setParam + */ + @Since("2.3.0") + def setLoss(value: String): this.type = set(loss, value) + setDefault(loss -> SquaredError) + + /** + * Sets the value of param [[epsilon]]. + * Default is 1.35. + * + * @group setExpertParam + */ + @Since("2.3.0") + def setEpsilon(value: Double): this.type = set(epsilon, value) + setDefault(epsilon -> 1.35) + + override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr => + // Extract the number of features before deciding optimization solver. + val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol)) + + val instances = extractInstances(dataset) + + instr.logPipelineStage(this) + instr.logDataset(dataset) + instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol, + elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss, + epsilon) + instr.logNumFeatures(numFeatures) + + if ($(loss) == SquaredError && (($(solver) == Auto && + numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) { + // oneDAL only support simple linear regression and ridge regression + val paramSupported = ($(regParam) == 0) || ($(regParam) != 0 && $(elasticNetParam) == 0) + if (paramSupported && Utils.isOAPEnabled) { + val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) + val executor_cores = Utils.sparkExecutorCores() + logInfo(s"LinearRegressionDAL fit using $executor_num Executors") + + val optimizer = new LinearRegressionDALImpl($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + executor_num, executor_cores) + + // Return same model as WeightedLeastSquaresModel + val model = optimizer.train(dataset, Some(instr)) + + val lrModel = copyValues( + new LinearRegressionModel(uid, model.coefficients, model.intercept)) + + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } else { + // For low dimensional data, WeightedLeastSquares is more efficient since the + // training algorithm only requires one pass through the data. (SPARK-10668) + + val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + solverType = WeightedLeastSquares.Auto, maxIter = $(maxIter), tol = $(tol)) + val model = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr)) + // When it is trained by WeightedLeastSquares, training summary does not + // attach returned model. + val lrModel = copyValues(new LinearRegressionModel(uid, model.coefficients, model.intercept)) + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } + } + + val handlePersistence = dataset.storageLevel == StorageLevel.NONE + if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) + + val (featuresSummarizer, ySummarizer) = instances.treeAggregate( + (Summarizer.createSummarizerBuffer("mean", "std"), + Summarizer.createSummarizerBuffer("mean", "std", "count")))( + seqOp = (c: (SummarizerBuffer, SummarizerBuffer), instance: Instance) => + (c._1.add(instance.features, instance.weight), + c._2.add(Vectors.dense(instance.label), instance.weight)), + combOp = (c1: (SummarizerBuffer, SummarizerBuffer), + c2: (SummarizerBuffer, SummarizerBuffer)) => + (c1._1.merge(c2._1), c1._2.merge(c2._2)), + depth = $(aggregationDepth) + ) + + val yMean = ySummarizer.mean(0) + val rawYStd = ySummarizer.std(0) + + instr.logNumExamples(ySummarizer.count) + instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean) + instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd) + instr.logSumOfWeights(featuresSummarizer.weightSum) + + if (rawYStd == 0.0) { + if ($(fitIntercept) || yMean == 0.0) { + // If the rawYStd==0 and fitIntercept==true, then the intercept is yMean with + // zero coefficient; as a result, training is not needed. + // Also, if yMean==0 and rawYStd==0, all the coefficients are zero regardless of + // the fitIntercept. + if (yMean == 0.0) { + instr.logWarning(s"Mean and standard deviation of the label are zero, so the " + + s"coefficients and the intercept will all be zero; as a result, training is not " + + s"needed.") + } else { + instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " + + s"will be zeros and the intercept will be the mean of the label; as a result, " + + s"training is not needed.") + } + if (handlePersistence) instances.unpersist() + val coefficients = Vectors.sparse(numFeatures, Seq.empty) + val intercept = yMean + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + Array(0D)) + + return model.setSummary(Some(trainingSummary)) + } else { + require($(regParam) == 0.0, "The standard deviation of the label is zero. " + + "Model cannot be regularized.") + instr.logWarning(s"The standard deviation of the label is zero. " + + "Consider setting fitIntercept=true.") + } + } + + // if y is constant (rawYStd is zero), then y cannot be scaled. In this case + // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm. + val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean) + val featuresMean = featuresSummarizer.mean.toArray + val featuresStd = featuresSummarizer.std.toArray + val bcFeaturesMean = instances.context.broadcast(featuresMean) + val bcFeaturesStd = instances.context.broadcast(featuresStd) + + if (!$(fitIntercept) && (0 until numFeatures).exists { i => + featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) { + instr.logWarning("Fitting LinearRegressionModel without intercept on dataset with " + + "constant nonzero column, Spark MLlib outputs zero coefficients for constant nonzero " + + "columns. This behavior is the same as R glmnet but different from LIBSVM.") + } + + // Since we implicitly do the feature scaling when we compute the cost function + // to improve the convergence, the effective regParam will be changed. + val effectiveRegParam = $(loss) match { + case SquaredError => $(regParam) / yStd + case Huber => $(regParam) + } + val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam + val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam + + val getFeaturesStd = (j: Int) => if (j >= 0 && j < numFeatures) featuresStd(j) else 0.0 + val regularization = if (effectiveL2RegParam != 0.0) { + val shouldApply = (idx: Int) => idx >= 0 && idx < numFeatures + Some(new L2Regularization(effectiveL2RegParam, shouldApply, + if ($(standardization)) None else Some(getFeaturesStd))) + } else { + None + } + + val costFun = $(loss) match { + case SquaredError => + val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept), + bcFeaturesStd, bcFeaturesMean)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + case Huber => + val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + } + + val optimizer = $(loss) match { + case SquaredError => + if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) + } else { + val standardizationParam = $(standardization) + def effectiveL1RegFun = (index: Int) => { + if (standardizationParam) { + effectiveL1RegParam + } else { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0 + } + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol)) + } + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + val lowerBounds = BDV[Double](Array.fill(dim)(Double.MinValue)) + // Optimize huber loss in space "\sigma > 0" + lowerBounds(dim - 1) = Double.MinPositiveValue + val upperBounds = BDV[Double](Array.fill(dim)(Double.MaxValue)) + new BreezeLBFGSB(lowerBounds, upperBounds, $(maxIter), 10, $(tol)) + } + + val initialValues = $(loss) match { + case SquaredError => + Vectors.zeros(numFeatures) + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + Vectors.dense(Array.fill(dim)(1.0)) + } + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + initialValues.asBreeze.toDenseVector) + + val (coefficients, intercept, scale, objectiveHistory) = { + /* + Note that in Linear Regression, the objective history (loss + regularization) returned + from optimizer is computed in the scaled space given by the following formula. +
+ $$ + L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2 + + regTerms \\ + $$ +
+ */ + val arrayBuilder = mutable.ArrayBuilder.make[Double] + var state: optimizer.State = null + while (states.hasNext) { + state = states.next() + arrayBuilder += state.adjustedValue + } + if (state == null) { + val msg = s"${optimizer.getClass.getName} failed." + instr.logError(msg) + throw new SparkException(msg) + } + + bcFeaturesMean.destroy() + bcFeaturesStd.destroy() + + val parameters = state.x.toArray.clone() + + /* + The coefficients are trained in the scaled space; we're converting them back to + the original space. + */ + val rawCoefficients: Array[Double] = $(loss) match { + case SquaredError => parameters + case Huber => parameters.slice(0, numFeatures) + } + + var i = 0 + val len = rawCoefficients.length + val multiplier = $(loss) match { + case SquaredError => yStd + case Huber => 1.0 + } + while (i < len) { + rawCoefficients(i) *= { if (featuresStd(i) != 0.0) multiplier / featuresStd(i) else 0.0 } + i += 1 + } + + val interceptValue: Double = if ($(fitIntercept)) { + $(loss) match { + case SquaredError => + /* + The intercept of squared error in R's GLMNET is computed using closed form + after the coefficients are converged. See the following discussion for detail. + http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet + */ + yMean - dot(Vectors.dense(rawCoefficients), Vectors.dense(featuresMean)) + case Huber => parameters(numFeatures) + } + } else { + 0.0 + } + + val scaleValue: Double = $(loss) match { + case SquaredError => 1.0 + case Huber => parameters.last + } + + (Vectors.dense(rawCoefficients).compressed, interceptValue, scaleValue, arrayBuilder.result()) + } + + if (handlePersistence) instances.unpersist() + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept, scale)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + objectiveHistory) + + model.setSummary(Some(trainingSummary)) + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) +} + +@Since("1.6.0") +object LinearRegression extends DefaultParamsReadable[LinearRegression] { + + @Since("1.6.0") + override def load(path: String): LinearRegression = super.load(path) + + /** + * When using `LinearRegression.solver` == "normal", the solver must limit the number of + * features to at most this number. The entire covariance matrix X^T^X will be collected + * to the driver. This limit helps prevent memory overflow errors. + */ + @Since("2.1.0") + val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES + + /** String name for "auto". */ + private[regression] val Auto = "auto" + + /** String name for "normal". */ + private[regression] val Normal = "normal" + + /** String name for "l-bfgs". */ + private[regression] val LBFGS = "l-bfgs" + + /** Set of solvers that LinearRegression supports. */ + private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS) + + /** String name for "squaredError". */ + private[regression] val SquaredError = "squaredError" + + /** String name for "huber". */ + private[regression] val Huber = "huber" + + /** Set of loss function names that LinearRegression supports. */ + private[regression] val supportedLosses = Array(SquaredError, Huber) +} + +/** + * Model produced by [[LinearRegression]]. + */ +@Since("1.3.0") +class LinearRegressionModel private[ml] ( + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val coefficients: Vector, + @Since("1.3.0") val intercept: Double, + @Since("2.3.0") val scale: Double) + extends RegressionModel[Vector, LinearRegressionModel] + with LinearRegressionParams with GeneralMLWritable + with HasTrainingSummary[LinearRegressionTrainingSummary] { + + private[ml] def this(uid: String, coefficients: Vector, intercept: Double) = + this(uid, coefficients, intercept, 1.0) + + override val numFeatures: Int = coefficients.size + + /** + * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is + * thrown if `hasSummary` is false. + */ + @Since("1.5.0") + override def summary: LinearRegressionTrainingSummary = super.summary + + /** + * Evaluates the model on a test dataset. + * + * @param dataset Test dataset to evaluate model on. + */ + @Since("2.0.0") + def evaluate(dataset: Dataset[_]): LinearRegressionSummary = { + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = findSummaryModelAndPredictionCol() + new LinearRegressionSummary(summaryModel.transform(dataset), predictionColName, + $(labelCol), $(featuresCol), summaryModel, Array(0D)) + } + + /** + * If the prediction column is set returns the current model and prediction column, + * otherwise generates a new column and sets it as the prediction column on a new copy + * of the current model. + */ + private[regression] def findSummaryModelAndPredictionCol(): (LinearRegressionModel, String) = { + $(predictionCol) match { + case "" => + val predictionColName = "prediction_" + java.util.UUID.randomUUID.toString + (copy(ParamMap.empty).setPredictionCol(predictionColName), predictionColName) + case p => (this, p) + } + } + + + override def predict(features: Vector): Double = { + dot(features, coefficients) + intercept + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegressionModel = { + val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) + newModel.setSummary(trainingSummary).setParent(parent) + } + + /** + * Returns a [[org.apache.spark.ml.util.GeneralMLWriter]] instance for this ML instance. + * + * For [[LinearRegressionModel]], this does NOT currently save the training [[summary]]. + * An option to save [[summary]] may be added in the future. + * + * This also does not save the [[parent]] currently. + */ + @Since("1.6.0") + override def write: GeneralMLWriter = new GeneralMLWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures" + } +} + +/** A writer for LinearRegression that handles the "internal" (or default) format */ +private class InternalLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "internal" + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector, scale: Double) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val instance = stage.asInstanceOf[LinearRegressionModel] + val sc = sparkSession.sparkContext + // Save metadata and Params + DefaultParamsWriter.saveMetadata(instance, path, sc) + // Save model data: intercept, coefficients, scale + val data = Data(instance.intercept, instance.coefficients, instance.scale) + val dataPath = new Path(path, "data").toString + sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) + } +} + +/** A writer for LinearRegression that handles the "pmml" format */ +private class PMMLLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "pmml" + + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val sc = sparkSession.sparkContext + // Construct the MLLib model which knows how to write to PMML. + val instance = stage.asInstanceOf[LinearRegressionModel] + val oldModel = new OldLinearRegressionModel(instance.coefficients, instance.intercept) + // Save PMML + oldModel.toPMML(sc, path) + } +} + +@Since("1.6.0") +object LinearRegressionModel extends MLReadable[LinearRegressionModel] { + + @Since("1.6.0") + override def read: MLReader[LinearRegressionModel] = new LinearRegressionModelReader + + @Since("1.6.0") + override def load(path: String): LinearRegressionModel = super.load(path) + + private class LinearRegressionModelReader extends MLReader[LinearRegressionModel] { + + /** Checked against metadata when loading model */ + private val className = classOf[LinearRegressionModel].getName + + override def load(path: String): LinearRegressionModel = { + val metadata = DefaultParamsReader.loadMetadata(path, sc, className) + + val dataPath = new Path(path, "data").toString + val data = sparkSession.read.format("parquet").load(dataPath) + val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion) + val model = if (majorVersion < 2 || (majorVersion == 2 && minorVersion <= 2)) { + // Spark 2.2 and before + val Row(intercept: Double, coefficients: Vector) = + MLUtils.convertVectorColumnsToML(data, "coefficients") + .select("intercept", "coefficients") + .head() + new LinearRegressionModel(metadata.uid, coefficients, intercept) + } else { + // Spark 2.3 and later + val Row(intercept: Double, coefficients: Vector, scale: Double) = + data.select("intercept", "coefficients", "scale").head() + new LinearRegressionModel(metadata.uid, coefficients, intercept, scale) + } + + metadata.getAndSetParams(model) + model + } + } +} + +/** + * Linear regression training results. Currently, the training summary ignores the + * training weights except for the objective trace. + * + * @param predictions predictions output by the model's `transform` method. + * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. + */ +@Since("1.5.0") +class LinearRegressionTrainingSummary private[regression] ( + predictions: DataFrame, + predictionCol: String, + labelCol: String, + featuresCol: String, + model: LinearRegressionModel, + diagInvAtWA: Array[Double], + val objectiveHistory: Array[Double]) + extends LinearRegressionSummary( + predictions, + predictionCol, + labelCol, + featuresCol, + model, + diagInvAtWA) { + + /** + * Number of training iterations until termination + * + * This value is only available when using the "l-bfgs" solver. + * + * @see `LinearRegression.solver` + */ + @Since("1.5.0") + val totalIterations = objectiveHistory.length + +} + +/** + * Linear regression results evaluated on a dataset. + * + * @param predictions predictions output by the model's `transform` method. + * @param predictionCol Field in "predictions" which gives the predicted value of the label at + * each instance. + * @param labelCol Field in "predictions" which gives the true label of each instance. + * @param featuresCol Field in "predictions" which gives the features of each instance as a vector. + */ +@Since("1.5.0") +class LinearRegressionSummary private[regression] ( + @transient val predictions: DataFrame, + val predictionCol: String, + val labelCol: String, + val featuresCol: String, + private val privateModel: LinearRegressionModel, + private val diagInvAtWA: Array[Double]) extends Serializable { + + @transient private val metrics = new RegressionMetrics( + predictions + .select(col(predictionCol), col(labelCol).cast(DoubleType)) + .rdd + .map { case Row(pred: Double, label: Double) => (pred, label) }, + !privateModel.getFitIntercept) + + /** + * Returns the explained variance regression score. + * explainedVariance = 1 - variance(y - \hat{y}) / variance(y) + * Reference: + * Wikipedia explain variation + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val explainedVariance: Double = metrics.explainedVariance + + /** + * Returns the mean absolute error, which is a risk function corresponding to the + * expected value of the absolute error loss or l1-norm loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanAbsoluteError: Double = metrics.meanAbsoluteError + + /** + * Returns the mean squared error, which is a risk function corresponding to the + * expected value of the squared error loss or quadratic loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanSquaredError: Double = metrics.meanSquaredError + + /** + * Returns the root mean squared error, which is defined as the square root of + * the mean squared error. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val rootMeanSquaredError: Double = metrics.rootMeanSquaredError + + /** + * Returns R^2^, the coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val r2: Double = metrics.r2 + + /** + * Returns Adjusted R^2^, the adjusted coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("2.3.0") + val r2adj: Double = { + val interceptDOF = if (privateModel.getFitIntercept) 1 else 0 + 1 - (1 - r2) * (numInstances - interceptDOF) / + (numInstances - privateModel.coefficients.size - interceptDOF) + } + + /** Residuals (label - predicted value) */ + @Since("1.5.0") + @transient lazy val residuals: DataFrame = { + val t = udf { (pred: Double, label: Double) => label - pred } + predictions.select(t(col(predictionCol), col(labelCol)).as("residuals")) + } + + /** Number of instances in DataFrame predictions */ + lazy val numInstances: Long = predictions.count() + + /** Degrees of freedom */ + @Since("2.2.0") + val degreesOfFreedom: Long = if (privateModel.getFitIntercept) { + numInstances - privateModel.coefficients.size - 1 + } else { + numInstances - privateModel.coefficients.size + } + + /** + * The weighted residuals, the usual residuals rescaled by + * the square root of the instance weights. + */ + lazy val devianceResiduals: Array[Double] = { + val weighted = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + lit(1.0) + } else { + sqrt(col(privateModel.getWeightCol)) + } + val dr = predictions + .select(col(privateModel.getLabelCol).minus(col(privateModel.getPredictionCol)) + .multiply(weighted).as("weightedResiduals")) + .select(min(col("weightedResiduals")).as("min"), max(col("weightedResiduals")).as("max")) + .first() + Array(dr.getDouble(0), dr.getDouble(1)) + } + + /** + * Standard error of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val coefficientStandardErrors: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No Std. Error of coefficients available for this LinearRegressionModel") + } else { + val rss = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + meanSquaredError * numInstances + } else { + val t = udf { (pred: Double, label: Double, weight: Double) => + math.pow(label - pred, 2.0) * weight } + predictions.select(t(col(privateModel.getPredictionCol), col(privateModel.getLabelCol), + col(privateModel.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0) + } + val sigma2 = rss / degreesOfFreedom + diagInvAtWA.map(_ * sigma2).map(math.sqrt) + } + } + + /** + * T-statistic of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val tValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No t-statistic available for this LinearRegressionModel") + } else { + val estimate = if (privateModel.getFitIntercept) { + Array.concat(privateModel.coefficients.toArray, Array(privateModel.intercept)) + } else { + privateModel.coefficients.toArray + } + estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 } + } + } + + /** + * Two-sided p-value of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val pValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No p-value available for this LinearRegressionModel") + } else { + tValues.map { x => 2.0 * (1.0 - StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) } + } + } + +} + diff --git a/mllib-dal/src/spark-3.0.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib-dal/src/spark-3.0.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala new file mode 100644 index 000000000..85b8be9ff --- /dev/null +++ b/mllib-dal/src/spark-3.0.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -0,0 +1,1057 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import scala.collection.mutable + +import breeze.linalg.{DenseVector => BDV} +import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import breeze.stats.distributions.StudentsT +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.annotation.Since +import org.apache.spark.internal.Logging +import org.apache.spark.ml.{PipelineStage, PredictorParams} +import org.apache.spark.ml.feature.Instance +import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.linalg.BLAS._ +import org.apache.spark.ml.optim.WeightedLeastSquares +import org.apache.spark.ml.optim.aggregator.{HuberAggregator, LeastSquaresAggregator} +import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction} +import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.stat._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.util.Instrumentation.instrumented +import org.apache.spark.mllib.evaluation.RegressionMetrics +import org.apache.spark.mllib.linalg.VectorImplicits._ +import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel} +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.VersionUtils.majorMinorVersion + +/** + * Params for linear regression. + */ +private[regression] trait LinearRegressionParams extends PredictorParams + with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol + with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver + with HasAggregationDepth with HasLoss { + + import LinearRegression._ + + /** + * The solver algorithm for optimization. + * Supported options: "l-bfgs", "normal" and "auto". + * Default: "auto" + * + * @group param + */ + @Since("1.6.0") + final override val solver: Param[String] = new Param[String](this, "solver", + "The solver algorithm for optimization. Supported options: " + + s"${supportedSolvers.mkString(", ")}. (Default auto)", + ParamValidators.inArray[String](supportedSolvers)) + + /** + * The loss function to be optimized. + * Supported options: "squaredError" and "huber". + * Default: "squaredError" + * + * @group param + */ + @Since("2.3.0") + final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" + + s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default squaredError)", + ParamValidators.inArray[String](supportedLosses)) + + /** + * The shape parameter to control the amount of robustness. Must be > 1.0. + * At larger values of epsilon, the huber criterion becomes more similar to least squares + * regression; for small values of epsilon, the criterion is more similar to L1 regression. + * Default is 1.35 to get as much robustness as possible while retaining + * 95% statistical efficiency for normally distributed data. It matches sklearn + * HuberRegressor and is "M" from + * A robust hybrid of lasso and ridge regression. + * Only valid when "loss" is "huber". + * + * @group expertParam + */ + @Since("2.3.0") + final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " + + "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0)) + + /** @group getExpertParam */ + @Since("2.3.0") + def getEpsilon: Double = $(epsilon) + + setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true, + elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto, + aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35) + + override protected def validateAndTransformSchema( + schema: StructType, + fitting: Boolean, + featuresDataType: DataType): StructType = { + if (fitting) { + if ($(loss) == Huber) { + require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " + + "normal solver, please change solver to auto or l-bfgs.") + require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " + + s"L2 regularization, but got elasticNetParam = $getElasticNetParam.") + } + } + super.validateAndTransformSchema(schema, fitting, featuresDataType) + } +} + +/** + * Linear regression. + * + * The learning objective is to minimize the specified loss function, with regularization. + * This supports two kinds of loss: + * - squaredError (a.k.a squared loss) + * - huber (a hybrid of squared error for relatively small errors and absolute error for + * relatively large ones, and we estimate the scale parameter from training data) + * + * This supports multiple types of regularization: + * - none (a.k.a. ordinary least squares) + * - L2 (ridge regression) + * - L1 (Lasso) + * - L2 + L1 (elastic net) + * + * The squared error objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w}\frac{1}{2n}{\sum_{i=1}^n(X_{i}w - y_{i})^{2} + + * \lambda\left[\frac{1-\alpha}{2}{||w||_{2}}^{2} + \alpha{||w||_{1}}\right]} + * \end{align} + * $$ + *
+ * + * The huber objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma + + * H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2} + * \end{align} + * $$ + *
+ * + * where + * + *
+ * $$ + * \begin{align} + * H_m(z) = \begin{cases} + * z^2, & \text {if } |z| < \epsilon, \\ + * 2\epsilon|z| - \epsilon^2, & \text{otherwise} + * \end{cases} + * \end{align} + * $$ + *
+ * + * Note: Fitting with huber loss only supports none and L2 regularization. + */ +@Since("1.3.0") +class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) + extends Regressor[Vector, LinearRegression, LinearRegressionModel] + with LinearRegressionParams with DefaultParamsWritable with Logging { + + import LinearRegression._ + + @Since("1.4.0") + def this() = this(Identifiable.randomUID("linReg")) + + /** + * Set the regularization parameter. + * Default is 0.0. + * + * @group setParam + */ + @Since("1.3.0") + def setRegParam(value: Double): this.type = set(regParam, value) + + /** + * Set if we should fit the intercept. + * Default is true. + * + * @group setParam + */ + @Since("1.5.0") + def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) + + /** + * Whether to standardize the training features before fitting the model. + * The coefficients of models will be always returned on the original scale, + * so it will be transparent for users. + * Default is true. + * + * @note With/without standardization, the models should be always converged + * to the same solution when no regularization is applied. In R's GLMNET package, + * the default behavior is true as well. + * + * @group setParam + */ + @Since("1.5.0") + def setStandardization(value: Boolean): this.type = set(standardization, value) + + /** + * Set the ElasticNet mixing parameter. + * For alpha = 0, the penalty is an L2 penalty. + * For alpha = 1, it is an L1 penalty. + * For alpha in (0,1), the penalty is a combination of L1 and L2. + * Default is 0.0 which is an L2 penalty. + * + * Note: Fitting with huber loss only supports None and L2 regularization, + * so throws exception if this param is non-zero value. + * + * @group setParam + */ + @Since("1.4.0") + def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) + + /** + * Set the maximum number of iterations. + * Default is 100. + * + * @group setParam + */ + @Since("1.3.0") + def setMaxIter(value: Int): this.type = set(maxIter, value) + + /** + * Set the convergence tolerance of iterations. + * Smaller value will lead to higher accuracy with the cost of more iterations. + * Default is 1E-6. + * + * @group setParam + */ + @Since("1.4.0") + def setTol(value: Double): this.type = set(tol, value) + + /** + * Whether to over-/under-sample training instances according to the given weights in weightCol. + * If not set or empty, all instances are treated equally (weight 1.0). + * Default is not set, so all instances have weight one. + * + * @group setParam + */ + @Since("1.6.0") + def setWeightCol(value: String): this.type = set(weightCol, value) + + /** + * Set the solver algorithm used for optimization. + * In case of linear regression, this can be "l-bfgs", "normal" and "auto". + * - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton + * optimization method. + * - "normal" denotes using Normal Equation as an analytical solution to the linear regression + * problem. This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`. + * - "auto" (default) means that the solver algorithm is selected automatically. + * The Normal Equations solver will be used when possible, but this will automatically fall + * back to iterative optimization methods when needed. + * + * Note: Fitting with huber loss doesn't support normal solver, + * so throws exception if this param was set with "normal". + * @group setParam + */ + @Since("1.6.0") + def setSolver(value: String): this.type = set(solver, value) + + /** + * Suggested depth for treeAggregate (greater than or equal to 2). + * If the dimensions of features or the number of partitions are large, + * this param could be adjusted to a larger size. + * Default is 2. + * + * @group expertSetParam + */ + @Since("2.1.0") + def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) + + /** + * Sets the value of param [[loss]]. + * Default is "squaredError". + * + * @group setParam + */ + @Since("2.3.0") + def setLoss(value: String): this.type = set(loss, value) + + /** + * Sets the value of param [[epsilon]]. + * Default is 1.35. + * + * @group setExpertParam + */ + @Since("2.3.0") + def setEpsilon(value: Double): this.type = set(epsilon, value) + + override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr => + // Extract the number of features before deciding optimization solver. + val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol)) + + val instances = extractInstances(dataset) + + instr.logPipelineStage(this) + instr.logDataset(dataset) + instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol, + elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss, + epsilon) + instr.logNumFeatures(numFeatures) + + if ($(loss) == SquaredError && (($(solver) == Auto && + numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) { + // oneDAL only support simple linear regression and ridge regression + val paramSupported = ($(regParam) == 0) || ($(regParam) != 0 && $(elasticNetParam) == 0) + if (paramSupported && Utils.isOAPEnabled) { + val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) + val executor_cores = Utils.sparkExecutorCores() + logInfo(s"LinearRegressionDAL fit using $executor_num Executors") + + val optimizer = new LinearRegressionDALImpl($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + executor_num, executor_cores) + + // Return same model as WeightedLeastSquaresModel + val model = optimizer.train(dataset, Some(instr)) + + val lrModel = copyValues( + new LinearRegressionModel(uid, model.coefficients, model.intercept)) + + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } else { + // For low dimensional data, WeightedLeastSquares is more efficient since the + // training algorithm only requires one pass through the data. (SPARK-10668) + + val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + solverType = WeightedLeastSquares.Auto, maxIter = $(maxIter), tol = $(tol)) + val model = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr)) + // When it is trained by WeightedLeastSquares, training summary does not + // attach returned model. + val lrModel = copyValues(new LinearRegressionModel(uid, model.coefficients, model.intercept)) + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } + } + + val handlePersistence = dataset.storageLevel == StorageLevel.NONE + if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) + + val (featuresSummarizer, ySummarizer) = instances.treeAggregate( + (Summarizer.createSummarizerBuffer("mean", "std"), + Summarizer.createSummarizerBuffer("mean", "std", "count")))( + seqOp = (c: (SummarizerBuffer, SummarizerBuffer), instance: Instance) => + (c._1.add(instance.features, instance.weight), + c._2.add(Vectors.dense(instance.label), instance.weight)), + combOp = (c1: (SummarizerBuffer, SummarizerBuffer), + c2: (SummarizerBuffer, SummarizerBuffer)) => + (c1._1.merge(c2._1), c1._2.merge(c2._2)), + depth = $(aggregationDepth) + ) + + val yMean = ySummarizer.mean(0) + val rawYStd = ySummarizer.std(0) + + instr.logNumExamples(ySummarizer.count) + instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean) + instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd) + instr.logSumOfWeights(featuresSummarizer.weightSum) + + if (rawYStd == 0.0) { + if ($(fitIntercept) || yMean == 0.0) { + // If the rawYStd==0 and fitIntercept==true, then the intercept is yMean with + // zero coefficient; as a result, training is not needed. + // Also, if yMean==0 and rawYStd==0, all the coefficients are zero regardless of + // the fitIntercept. + if (yMean == 0.0) { + instr.logWarning(s"Mean and standard deviation of the label are zero, so the " + + s"coefficients and the intercept will all be zero; as a result, training is not " + + s"needed.") + } else { + instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " + + s"will be zeros and the intercept will be the mean of the label; as a result, " + + s"training is not needed.") + } + if (handlePersistence) instances.unpersist() + val coefficients = Vectors.sparse(numFeatures, Seq.empty) + val intercept = yMean + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + Array(0D)) + + return model.setSummary(Some(trainingSummary)) + } else { + require($(regParam) == 0.0, "The standard deviation of the label is zero. " + + "Model cannot be regularized.") + instr.logWarning(s"The standard deviation of the label is zero. " + + "Consider setting fitIntercept=true.") + } + } + + // if y is constant (rawYStd is zero), then y cannot be scaled. In this case + // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm. + val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean) + val featuresMean = featuresSummarizer.mean.toArray + val featuresStd = featuresSummarizer.std.toArray + val bcFeaturesMean = instances.context.broadcast(featuresMean) + val bcFeaturesStd = instances.context.broadcast(featuresStd) + + if (!$(fitIntercept) && (0 until numFeatures).exists { i => + featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) { + instr.logWarning("Fitting LinearRegressionModel without intercept on dataset with " + + "constant nonzero column, Spark MLlib outputs zero coefficients for constant nonzero " + + "columns. This behavior is the same as R glmnet but different from LIBSVM.") + } + + // Since we implicitly do the feature scaling when we compute the cost function + // to improve the convergence, the effective regParam will be changed. + val effectiveRegParam = $(loss) match { + case SquaredError => $(regParam) / yStd + case Huber => $(regParam) + } + val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam + val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam + + val getFeaturesStd = (j: Int) => if (j >= 0 && j < numFeatures) featuresStd(j) else 0.0 + val regularization = if (effectiveL2RegParam != 0.0) { + val shouldApply = (idx: Int) => idx >= 0 && idx < numFeatures + Some(new L2Regularization(effectiveL2RegParam, shouldApply, + if ($(standardization)) None else Some(getFeaturesStd))) + } else { + None + } + + val costFun = $(loss) match { + case SquaredError => + val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept), + bcFeaturesStd, bcFeaturesMean)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + case Huber => + val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + } + + val optimizer = $(loss) match { + case SquaredError => + if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) + } else { + val standardizationParam = $(standardization) + def effectiveL1RegFun = (index: Int) => { + if (standardizationParam) { + effectiveL1RegParam + } else { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0 + } + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol)) + } + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + val lowerBounds = BDV[Double](Array.fill(dim)(Double.MinValue)) + // Optimize huber loss in space "\sigma > 0" + lowerBounds(dim - 1) = Double.MinPositiveValue + val upperBounds = BDV[Double](Array.fill(dim)(Double.MaxValue)) + new BreezeLBFGSB(lowerBounds, upperBounds, $(maxIter), 10, $(tol)) + } + + val initialValues = $(loss) match { + case SquaredError => + Vectors.zeros(numFeatures) + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + Vectors.dense(Array.fill(dim)(1.0)) + } + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + initialValues.asBreeze.toDenseVector) + + val (coefficients, intercept, scale, objectiveHistory) = { + /* + Note that in Linear Regression, the objective history (loss + regularization) returned + from optimizer is computed in the scaled space given by the following formula. +
+ $$ + L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2 + + regTerms \\ + $$ +
+ */ + val arrayBuilder = mutable.ArrayBuilder.make[Double] + var state: optimizer.State = null + while (states.hasNext) { + state = states.next() + arrayBuilder += state.adjustedValue + } + if (state == null) { + val msg = s"${optimizer.getClass.getName} failed." + instr.logError(msg) + throw new SparkException(msg) + } + + bcFeaturesMean.destroy() + bcFeaturesStd.destroy() + + val parameters = state.x.toArray.clone() + + /* + The coefficients are trained in the scaled space; we're converting them back to + the original space. + */ + val rawCoefficients: Array[Double] = $(loss) match { + case SquaredError => parameters + case Huber => parameters.slice(0, numFeatures) + } + + var i = 0 + val len = rawCoefficients.length + val multiplier = $(loss) match { + case SquaredError => yStd + case Huber => 1.0 + } + while (i < len) { + rawCoefficients(i) *= { if (featuresStd(i) != 0.0) multiplier / featuresStd(i) else 0.0 } + i += 1 + } + + val interceptValue: Double = if ($(fitIntercept)) { + $(loss) match { + case SquaredError => + /* + The intercept of squared error in R's GLMNET is computed using closed form + after the coefficients are converged. See the following discussion for detail. + http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet + */ + yMean - dot(Vectors.dense(rawCoefficients), Vectors.dense(featuresMean)) + case Huber => parameters(numFeatures) + } + } else { + 0.0 + } + + val scaleValue: Double = $(loss) match { + case SquaredError => 1.0 + case Huber => parameters.last + } + + (Vectors.dense(rawCoefficients).compressed, interceptValue, scaleValue, arrayBuilder.result()) + } + + if (handlePersistence) instances.unpersist() + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept, scale)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + objectiveHistory) + + model.setSummary(Some(trainingSummary)) + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) +} + +@Since("1.6.0") +object LinearRegression extends DefaultParamsReadable[LinearRegression] { + + @Since("1.6.0") + override def load(path: String): LinearRegression = super.load(path) + + /** + * When using `LinearRegression.solver` == "normal", the solver must limit the number of + * features to at most this number. The entire covariance matrix X^T^X will be collected + * to the driver. This limit helps prevent memory overflow errors. + */ + @Since("2.1.0") + val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES + + /** String name for "auto". */ + private[regression] val Auto = "auto" + + /** String name for "normal". */ + private[regression] val Normal = "normal" + + /** String name for "l-bfgs". */ + private[regression] val LBFGS = "l-bfgs" + + /** Set of solvers that LinearRegression supports. */ + private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS) + + /** String name for "squaredError". */ + private[regression] val SquaredError = "squaredError" + + /** String name for "huber". */ + private[regression] val Huber = "huber" + + /** Set of loss function names that LinearRegression supports. */ + private[regression] val supportedLosses = Array(SquaredError, Huber) +} + +/** + * Model produced by [[LinearRegression]]. + */ +@Since("1.3.0") +class LinearRegressionModel private[ml] ( + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val coefficients: Vector, + @Since("1.3.0") val intercept: Double, + @Since("2.3.0") val scale: Double) + extends RegressionModel[Vector, LinearRegressionModel] + with LinearRegressionParams with GeneralMLWritable + with HasTrainingSummary[LinearRegressionTrainingSummary] { + + private[ml] def this(uid: String, coefficients: Vector, intercept: Double) = + this(uid, coefficients, intercept, 1.0) + + override val numFeatures: Int = coefficients.size + + /** + * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is + * thrown if `hasSummary` is false. + */ + @Since("1.5.0") + override def summary: LinearRegressionTrainingSummary = super.summary + + /** + * Evaluates the model on a test dataset. + * + * @param dataset Test dataset to evaluate model on. + */ + @Since("2.0.0") + def evaluate(dataset: Dataset[_]): LinearRegressionSummary = { + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = findSummaryModelAndPredictionCol() + new LinearRegressionSummary(summaryModel.transform(dataset), predictionColName, + $(labelCol), $(featuresCol), summaryModel, Array(0D)) + } + + /** + * If the prediction column is set returns the current model and prediction column, + * otherwise generates a new column and sets it as the prediction column on a new copy + * of the current model. + */ + private[regression] def findSummaryModelAndPredictionCol(): (LinearRegressionModel, String) = { + $(predictionCol) match { + case "" => + val predictionColName = "prediction_" + java.util.UUID.randomUUID.toString + (copy(ParamMap.empty).setPredictionCol(predictionColName), predictionColName) + case p => (this, p) + } + } + + + override def predict(features: Vector): Double = { + dot(features, coefficients) + intercept + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegressionModel = { + val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) + newModel.setSummary(trainingSummary).setParent(parent) + } + + /** + * Returns a [[org.apache.spark.ml.util.GeneralMLWriter]] instance for this ML instance. + * + * For [[LinearRegressionModel]], this does NOT currently save the training [[summary]]. + * An option to save [[summary]] may be added in the future. + * + * This also does not save the [[parent]] currently. + */ + @Since("1.6.0") + override def write: GeneralMLWriter = new GeneralMLWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures" + } +} + +/** A writer for LinearRegression that handles the "internal" (or default) format */ +private class InternalLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "internal" + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector, scale: Double) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val instance = stage.asInstanceOf[LinearRegressionModel] + val sc = sparkSession.sparkContext + // Save metadata and Params + DefaultParamsWriter.saveMetadata(instance, path, sc) + // Save model data: intercept, coefficients, scale + val data = Data(instance.intercept, instance.coefficients, instance.scale) + val dataPath = new Path(path, "data").toString + sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) + } +} + +/** A writer for LinearRegression that handles the "pmml" format */ +private class PMMLLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "pmml" + + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val sc = sparkSession.sparkContext + // Construct the MLLib model which knows how to write to PMML. + val instance = stage.asInstanceOf[LinearRegressionModel] + val oldModel = new OldLinearRegressionModel(instance.coefficients, instance.intercept) + // Save PMML + oldModel.toPMML(sc, path) + } +} + +@Since("1.6.0") +object LinearRegressionModel extends MLReadable[LinearRegressionModel] { + + @Since("1.6.0") + override def read: MLReader[LinearRegressionModel] = new LinearRegressionModelReader + + @Since("1.6.0") + override def load(path: String): LinearRegressionModel = super.load(path) + + private class LinearRegressionModelReader extends MLReader[LinearRegressionModel] { + + /** Checked against metadata when loading model */ + private val className = classOf[LinearRegressionModel].getName + + override def load(path: String): LinearRegressionModel = { + val metadata = DefaultParamsReader.loadMetadata(path, sc, className) + + val dataPath = new Path(path, "data").toString + val data = sparkSession.read.format("parquet").load(dataPath) + val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion) + val model = if (majorVersion < 2 || (majorVersion == 2 && minorVersion <= 2)) { + // Spark 2.2 and before + val Row(intercept: Double, coefficients: Vector) = + MLUtils.convertVectorColumnsToML(data, "coefficients") + .select("intercept", "coefficients") + .head() + new LinearRegressionModel(metadata.uid, coefficients, intercept) + } else { + // Spark 2.3 and later + val Row(intercept: Double, coefficients: Vector, scale: Double) = + data.select("intercept", "coefficients", "scale").head() + new LinearRegressionModel(metadata.uid, coefficients, intercept, scale) + } + + metadata.getAndSetParams(model) + model + } + } +} + +/** + * Linear regression training results. Currently, the training summary ignores the + * training weights except for the objective trace. + * + * @param predictions predictions output by the model's `transform` method. + * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. + */ +@Since("1.5.0") +class LinearRegressionTrainingSummary private[regression] ( + predictions: DataFrame, + predictionCol: String, + labelCol: String, + featuresCol: String, + model: LinearRegressionModel, + diagInvAtWA: Array[Double], + val objectiveHistory: Array[Double]) + extends LinearRegressionSummary( + predictions, + predictionCol, + labelCol, + featuresCol, + model, + diagInvAtWA) { + + /** + * Number of training iterations until termination + * + * This value is only available when using the "l-bfgs" solver. + * + * @see `LinearRegression.solver` + */ + @Since("1.5.0") + val totalIterations = objectiveHistory.length + +} + +/** + * Linear regression results evaluated on a dataset. + * + * @param predictions predictions output by the model's `transform` method. + * @param predictionCol Field in "predictions" which gives the predicted value of the label at + * each instance. + * @param labelCol Field in "predictions" which gives the true label of each instance. + * @param featuresCol Field in "predictions" which gives the features of each instance as a vector. + */ +@Since("1.5.0") +class LinearRegressionSummary private[regression] ( + @transient val predictions: DataFrame, + val predictionCol: String, + val labelCol: String, + val featuresCol: String, + private val privateModel: LinearRegressionModel, + private val diagInvAtWA: Array[Double]) extends Serializable { + + @transient private val metrics = new RegressionMetrics( + predictions + .select(col(predictionCol), col(labelCol).cast(DoubleType)) + .rdd + .map { case Row(pred: Double, label: Double) => (pred, label) }, + !privateModel.getFitIntercept) + + /** + * Returns the explained variance regression score. + * explainedVariance = 1 - variance(y - \hat{y}) / variance(y) + * Reference: + * Wikipedia explain variation + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val explainedVariance: Double = metrics.explainedVariance + + /** + * Returns the mean absolute error, which is a risk function corresponding to the + * expected value of the absolute error loss or l1-norm loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanAbsoluteError: Double = metrics.meanAbsoluteError + + /** + * Returns the mean squared error, which is a risk function corresponding to the + * expected value of the squared error loss or quadratic loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanSquaredError: Double = metrics.meanSquaredError + + /** + * Returns the root mean squared error, which is defined as the square root of + * the mean squared error. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val rootMeanSquaredError: Double = metrics.rootMeanSquaredError + + /** + * Returns R^2^, the coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val r2: Double = metrics.r2 + + /** + * Returns Adjusted R^2^, the adjusted coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("2.3.0") + val r2adj: Double = { + val interceptDOF = if (privateModel.getFitIntercept) 1 else 0 + 1 - (1 - r2) * (numInstances - interceptDOF) / + (numInstances - privateModel.coefficients.size - interceptDOF) + } + + /** Residuals (label - predicted value) */ + @Since("1.5.0") + @transient lazy val residuals: DataFrame = { + val t = udf { (pred: Double, label: Double) => label - pred } + predictions.select(t(col(predictionCol), col(labelCol)).as("residuals")) + } + + /** Number of instances in DataFrame predictions */ + lazy val numInstances: Long = predictions.count() + + /** Degrees of freedom */ + @Since("2.2.0") + val degreesOfFreedom: Long = if (privateModel.getFitIntercept) { + numInstances - privateModel.coefficients.size - 1 + } else { + numInstances - privateModel.coefficients.size + } + + /** + * The weighted residuals, the usual residuals rescaled by + * the square root of the instance weights. + */ + lazy val devianceResiduals: Array[Double] = { + val weighted = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + lit(1.0) + } else { + sqrt(col(privateModel.getWeightCol)) + } + val dr = predictions + .select(col(privateModel.getLabelCol).minus(col(privateModel.getPredictionCol)) + .multiply(weighted).as("weightedResiduals")) + .select(min(col("weightedResiduals")).as("min"), max(col("weightedResiduals")).as("max")) + .first() + Array(dr.getDouble(0), dr.getDouble(1)) + } + + /** + * Standard error of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val coefficientStandardErrors: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No Std. Error of coefficients available for this LinearRegressionModel") + } else { + val rss = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + meanSquaredError * numInstances + } else { + val t = udf { (pred: Double, label: Double, weight: Double) => + math.pow(label - pred, 2.0) * weight } + predictions.select(t(col(privateModel.getPredictionCol), col(privateModel.getLabelCol), + col(privateModel.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0) + } + val sigma2 = rss / degreesOfFreedom + diagInvAtWA.map(_ * sigma2).map(math.sqrt) + } + } + + /** + * T-statistic of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val tValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No t-statistic available for this LinearRegressionModel") + } else { + val estimate = if (privateModel.getFitIntercept) { + Array.concat(privateModel.coefficients.toArray, Array(privateModel.intercept)) + } else { + privateModel.coefficients.toArray + } + estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 } + } + } + + /** + * Two-sided p-value of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val pValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No p-value available for this LinearRegressionModel") + } else { + tValues.map { x => 2.0 * (1.0 - StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) } + } + } + +} + diff --git a/mllib-dal/src/spark-3.0.2/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib-dal/src/spark-3.0.2/main/scala/org/apache/spark/ml/regression/LinearRegression.scala new file mode 100644 index 000000000..85b8be9ff --- /dev/null +++ b/mllib-dal/src/spark-3.0.2/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -0,0 +1,1057 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import scala.collection.mutable + +import breeze.linalg.{DenseVector => BDV} +import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import breeze.stats.distributions.StudentsT +import org.apache.hadoop.fs.Path + +import org.apache.spark.SparkException +import org.apache.spark.annotation.Since +import org.apache.spark.internal.Logging +import org.apache.spark.ml.{PipelineStage, PredictorParams} +import org.apache.spark.ml.feature.Instance +import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.linalg.BLAS._ +import org.apache.spark.ml.optim.WeightedLeastSquares +import org.apache.spark.ml.optim.aggregator.{HuberAggregator, LeastSquaresAggregator} +import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction} +import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.stat._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.util.Instrumentation.instrumented +import org.apache.spark.mllib.evaluation.RegressionMetrics +import org.apache.spark.mllib.linalg.VectorImplicits._ +import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel} +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.VersionUtils.majorMinorVersion + +/** + * Params for linear regression. + */ +private[regression] trait LinearRegressionParams extends PredictorParams + with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol + with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver + with HasAggregationDepth with HasLoss { + + import LinearRegression._ + + /** + * The solver algorithm for optimization. + * Supported options: "l-bfgs", "normal" and "auto". + * Default: "auto" + * + * @group param + */ + @Since("1.6.0") + final override val solver: Param[String] = new Param[String](this, "solver", + "The solver algorithm for optimization. Supported options: " + + s"${supportedSolvers.mkString(", ")}. (Default auto)", + ParamValidators.inArray[String](supportedSolvers)) + + /** + * The loss function to be optimized. + * Supported options: "squaredError" and "huber". + * Default: "squaredError" + * + * @group param + */ + @Since("2.3.0") + final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" + + s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default squaredError)", + ParamValidators.inArray[String](supportedLosses)) + + /** + * The shape parameter to control the amount of robustness. Must be > 1.0. + * At larger values of epsilon, the huber criterion becomes more similar to least squares + * regression; for small values of epsilon, the criterion is more similar to L1 regression. + * Default is 1.35 to get as much robustness as possible while retaining + * 95% statistical efficiency for normally distributed data. It matches sklearn + * HuberRegressor and is "M" from + * A robust hybrid of lasso and ridge regression. + * Only valid when "loss" is "huber". + * + * @group expertParam + */ + @Since("2.3.0") + final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " + + "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0)) + + /** @group getExpertParam */ + @Since("2.3.0") + def getEpsilon: Double = $(epsilon) + + setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true, + elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto, + aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35) + + override protected def validateAndTransformSchema( + schema: StructType, + fitting: Boolean, + featuresDataType: DataType): StructType = { + if (fitting) { + if ($(loss) == Huber) { + require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " + + "normal solver, please change solver to auto or l-bfgs.") + require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " + + s"L2 regularization, but got elasticNetParam = $getElasticNetParam.") + } + } + super.validateAndTransformSchema(schema, fitting, featuresDataType) + } +} + +/** + * Linear regression. + * + * The learning objective is to minimize the specified loss function, with regularization. + * This supports two kinds of loss: + * - squaredError (a.k.a squared loss) + * - huber (a hybrid of squared error for relatively small errors and absolute error for + * relatively large ones, and we estimate the scale parameter from training data) + * + * This supports multiple types of regularization: + * - none (a.k.a. ordinary least squares) + * - L2 (ridge regression) + * - L1 (Lasso) + * - L2 + L1 (elastic net) + * + * The squared error objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w}\frac{1}{2n}{\sum_{i=1}^n(X_{i}w - y_{i})^{2} + + * \lambda\left[\frac{1-\alpha}{2}{||w||_{2}}^{2} + \alpha{||w||_{1}}\right]} + * \end{align} + * $$ + *
+ * + * The huber objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma + + * H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2} + * \end{align} + * $$ + *
+ * + * where + * + *
+ * $$ + * \begin{align} + * H_m(z) = \begin{cases} + * z^2, & \text {if } |z| < \epsilon, \\ + * 2\epsilon|z| - \epsilon^2, & \text{otherwise} + * \end{cases} + * \end{align} + * $$ + *
+ * + * Note: Fitting with huber loss only supports none and L2 regularization. + */ +@Since("1.3.0") +class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) + extends Regressor[Vector, LinearRegression, LinearRegressionModel] + with LinearRegressionParams with DefaultParamsWritable with Logging { + + import LinearRegression._ + + @Since("1.4.0") + def this() = this(Identifiable.randomUID("linReg")) + + /** + * Set the regularization parameter. + * Default is 0.0. + * + * @group setParam + */ + @Since("1.3.0") + def setRegParam(value: Double): this.type = set(regParam, value) + + /** + * Set if we should fit the intercept. + * Default is true. + * + * @group setParam + */ + @Since("1.5.0") + def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) + + /** + * Whether to standardize the training features before fitting the model. + * The coefficients of models will be always returned on the original scale, + * so it will be transparent for users. + * Default is true. + * + * @note With/without standardization, the models should be always converged + * to the same solution when no regularization is applied. In R's GLMNET package, + * the default behavior is true as well. + * + * @group setParam + */ + @Since("1.5.0") + def setStandardization(value: Boolean): this.type = set(standardization, value) + + /** + * Set the ElasticNet mixing parameter. + * For alpha = 0, the penalty is an L2 penalty. + * For alpha = 1, it is an L1 penalty. + * For alpha in (0,1), the penalty is a combination of L1 and L2. + * Default is 0.0 which is an L2 penalty. + * + * Note: Fitting with huber loss only supports None and L2 regularization, + * so throws exception if this param is non-zero value. + * + * @group setParam + */ + @Since("1.4.0") + def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) + + /** + * Set the maximum number of iterations. + * Default is 100. + * + * @group setParam + */ + @Since("1.3.0") + def setMaxIter(value: Int): this.type = set(maxIter, value) + + /** + * Set the convergence tolerance of iterations. + * Smaller value will lead to higher accuracy with the cost of more iterations. + * Default is 1E-6. + * + * @group setParam + */ + @Since("1.4.0") + def setTol(value: Double): this.type = set(tol, value) + + /** + * Whether to over-/under-sample training instances according to the given weights in weightCol. + * If not set or empty, all instances are treated equally (weight 1.0). + * Default is not set, so all instances have weight one. + * + * @group setParam + */ + @Since("1.6.0") + def setWeightCol(value: String): this.type = set(weightCol, value) + + /** + * Set the solver algorithm used for optimization. + * In case of linear regression, this can be "l-bfgs", "normal" and "auto". + * - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton + * optimization method. + * - "normal" denotes using Normal Equation as an analytical solution to the linear regression + * problem. This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`. + * - "auto" (default) means that the solver algorithm is selected automatically. + * The Normal Equations solver will be used when possible, but this will automatically fall + * back to iterative optimization methods when needed. + * + * Note: Fitting with huber loss doesn't support normal solver, + * so throws exception if this param was set with "normal". + * @group setParam + */ + @Since("1.6.0") + def setSolver(value: String): this.type = set(solver, value) + + /** + * Suggested depth for treeAggregate (greater than or equal to 2). + * If the dimensions of features or the number of partitions are large, + * this param could be adjusted to a larger size. + * Default is 2. + * + * @group expertSetParam + */ + @Since("2.1.0") + def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) + + /** + * Sets the value of param [[loss]]. + * Default is "squaredError". + * + * @group setParam + */ + @Since("2.3.0") + def setLoss(value: String): this.type = set(loss, value) + + /** + * Sets the value of param [[epsilon]]. + * Default is 1.35. + * + * @group setExpertParam + */ + @Since("2.3.0") + def setEpsilon(value: Double): this.type = set(epsilon, value) + + override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr => + // Extract the number of features before deciding optimization solver. + val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol)) + + val instances = extractInstances(dataset) + + instr.logPipelineStage(this) + instr.logDataset(dataset) + instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol, + elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss, + epsilon) + instr.logNumFeatures(numFeatures) + + if ($(loss) == SquaredError && (($(solver) == Auto && + numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) { + // oneDAL only support simple linear regression and ridge regression + val paramSupported = ($(regParam) == 0) || ($(regParam) != 0 && $(elasticNetParam) == 0) + if (paramSupported && Utils.isOAPEnabled) { + val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) + val executor_cores = Utils.sparkExecutorCores() + logInfo(s"LinearRegressionDAL fit using $executor_num Executors") + + val optimizer = new LinearRegressionDALImpl($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + executor_num, executor_cores) + + // Return same model as WeightedLeastSquaresModel + val model = optimizer.train(dataset, Some(instr)) + + val lrModel = copyValues( + new LinearRegressionModel(uid, model.coefficients, model.intercept)) + + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } else { + // For low dimensional data, WeightedLeastSquares is more efficient since the + // training algorithm only requires one pass through the data. (SPARK-10668) + + val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + solverType = WeightedLeastSquares.Auto, maxIter = $(maxIter), tol = $(tol)) + val model = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr)) + // When it is trained by WeightedLeastSquares, training summary does not + // attach returned model. + val lrModel = copyValues(new LinearRegressionModel(uid, model.coefficients, model.intercept)) + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } + } + + val handlePersistence = dataset.storageLevel == StorageLevel.NONE + if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK) + + val (featuresSummarizer, ySummarizer) = instances.treeAggregate( + (Summarizer.createSummarizerBuffer("mean", "std"), + Summarizer.createSummarizerBuffer("mean", "std", "count")))( + seqOp = (c: (SummarizerBuffer, SummarizerBuffer), instance: Instance) => + (c._1.add(instance.features, instance.weight), + c._2.add(Vectors.dense(instance.label), instance.weight)), + combOp = (c1: (SummarizerBuffer, SummarizerBuffer), + c2: (SummarizerBuffer, SummarizerBuffer)) => + (c1._1.merge(c2._1), c1._2.merge(c2._2)), + depth = $(aggregationDepth) + ) + + val yMean = ySummarizer.mean(0) + val rawYStd = ySummarizer.std(0) + + instr.logNumExamples(ySummarizer.count) + instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean) + instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd) + instr.logSumOfWeights(featuresSummarizer.weightSum) + + if (rawYStd == 0.0) { + if ($(fitIntercept) || yMean == 0.0) { + // If the rawYStd==0 and fitIntercept==true, then the intercept is yMean with + // zero coefficient; as a result, training is not needed. + // Also, if yMean==0 and rawYStd==0, all the coefficients are zero regardless of + // the fitIntercept. + if (yMean == 0.0) { + instr.logWarning(s"Mean and standard deviation of the label are zero, so the " + + s"coefficients and the intercept will all be zero; as a result, training is not " + + s"needed.") + } else { + instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " + + s"will be zeros and the intercept will be the mean of the label; as a result, " + + s"training is not needed.") + } + if (handlePersistence) instances.unpersist() + val coefficients = Vectors.sparse(numFeatures, Seq.empty) + val intercept = yMean + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + Array(0D)) + + return model.setSummary(Some(trainingSummary)) + } else { + require($(regParam) == 0.0, "The standard deviation of the label is zero. " + + "Model cannot be regularized.") + instr.logWarning(s"The standard deviation of the label is zero. " + + "Consider setting fitIntercept=true.") + } + } + + // if y is constant (rawYStd is zero), then y cannot be scaled. In this case + // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm. + val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean) + val featuresMean = featuresSummarizer.mean.toArray + val featuresStd = featuresSummarizer.std.toArray + val bcFeaturesMean = instances.context.broadcast(featuresMean) + val bcFeaturesStd = instances.context.broadcast(featuresStd) + + if (!$(fitIntercept) && (0 until numFeatures).exists { i => + featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) { + instr.logWarning("Fitting LinearRegressionModel without intercept on dataset with " + + "constant nonzero column, Spark MLlib outputs zero coefficients for constant nonzero " + + "columns. This behavior is the same as R glmnet but different from LIBSVM.") + } + + // Since we implicitly do the feature scaling when we compute the cost function + // to improve the convergence, the effective regParam will be changed. + val effectiveRegParam = $(loss) match { + case SquaredError => $(regParam) / yStd + case Huber => $(regParam) + } + val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam + val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam + + val getFeaturesStd = (j: Int) => if (j >= 0 && j < numFeatures) featuresStd(j) else 0.0 + val regularization = if (effectiveL2RegParam != 0.0) { + val shouldApply = (idx: Int) => idx >= 0 && idx < numFeatures + Some(new L2Regularization(effectiveL2RegParam, shouldApply, + if ($(standardization)) None else Some(getFeaturesStd))) + } else { + None + } + + val costFun = $(loss) match { + case SquaredError => + val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept), + bcFeaturesStd, bcFeaturesMean)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + case Huber => + val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_) + new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth)) + } + + val optimizer = $(loss) match { + case SquaredError => + if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) + } else { + val standardizationParam = $(standardization) + def effectiveL1RegFun = (index: Int) => { + if (standardizationParam) { + effectiveL1RegParam + } else { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0 + } + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol)) + } + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + val lowerBounds = BDV[Double](Array.fill(dim)(Double.MinValue)) + // Optimize huber loss in space "\sigma > 0" + lowerBounds(dim - 1) = Double.MinPositiveValue + val upperBounds = BDV[Double](Array.fill(dim)(Double.MaxValue)) + new BreezeLBFGSB(lowerBounds, upperBounds, $(maxIter), 10, $(tol)) + } + + val initialValues = $(loss) match { + case SquaredError => + Vectors.zeros(numFeatures) + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + Vectors.dense(Array.fill(dim)(1.0)) + } + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + initialValues.asBreeze.toDenseVector) + + val (coefficients, intercept, scale, objectiveHistory) = { + /* + Note that in Linear Regression, the objective history (loss + regularization) returned + from optimizer is computed in the scaled space given by the following formula. +
+ $$ + L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2 + + regTerms \\ + $$ +
+ */ + val arrayBuilder = mutable.ArrayBuilder.make[Double] + var state: optimizer.State = null + while (states.hasNext) { + state = states.next() + arrayBuilder += state.adjustedValue + } + if (state == null) { + val msg = s"${optimizer.getClass.getName} failed." + instr.logError(msg) + throw new SparkException(msg) + } + + bcFeaturesMean.destroy() + bcFeaturesStd.destroy() + + val parameters = state.x.toArray.clone() + + /* + The coefficients are trained in the scaled space; we're converting them back to + the original space. + */ + val rawCoefficients: Array[Double] = $(loss) match { + case SquaredError => parameters + case Huber => parameters.slice(0, numFeatures) + } + + var i = 0 + val len = rawCoefficients.length + val multiplier = $(loss) match { + case SquaredError => yStd + case Huber => 1.0 + } + while (i < len) { + rawCoefficients(i) *= { if (featuresStd(i) != 0.0) multiplier / featuresStd(i) else 0.0 } + i += 1 + } + + val interceptValue: Double = if ($(fitIntercept)) { + $(loss) match { + case SquaredError => + /* + The intercept of squared error in R's GLMNET is computed using closed form + after the coefficients are converged. See the following discussion for detail. + http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet + */ + yMean - dot(Vectors.dense(rawCoefficients), Vectors.dense(featuresMean)) + case Huber => parameters(numFeatures) + } + } else { + 0.0 + } + + val scaleValue: Double = $(loss) match { + case SquaredError => 1.0 + case Huber => parameters.last + } + + (Vectors.dense(rawCoefficients).compressed, interceptValue, scaleValue, arrayBuilder.result()) + } + + if (handlePersistence) instances.unpersist() + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept, scale)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + model, + Array(0D), + objectiveHistory) + + model.setSummary(Some(trainingSummary)) + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) +} + +@Since("1.6.0") +object LinearRegression extends DefaultParamsReadable[LinearRegression] { + + @Since("1.6.0") + override def load(path: String): LinearRegression = super.load(path) + + /** + * When using `LinearRegression.solver` == "normal", the solver must limit the number of + * features to at most this number. The entire covariance matrix X^T^X will be collected + * to the driver. This limit helps prevent memory overflow errors. + */ + @Since("2.1.0") + val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES + + /** String name for "auto". */ + private[regression] val Auto = "auto" + + /** String name for "normal". */ + private[regression] val Normal = "normal" + + /** String name for "l-bfgs". */ + private[regression] val LBFGS = "l-bfgs" + + /** Set of solvers that LinearRegression supports. */ + private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS) + + /** String name for "squaredError". */ + private[regression] val SquaredError = "squaredError" + + /** String name for "huber". */ + private[regression] val Huber = "huber" + + /** Set of loss function names that LinearRegression supports. */ + private[regression] val supportedLosses = Array(SquaredError, Huber) +} + +/** + * Model produced by [[LinearRegression]]. + */ +@Since("1.3.0") +class LinearRegressionModel private[ml] ( + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val coefficients: Vector, + @Since("1.3.0") val intercept: Double, + @Since("2.3.0") val scale: Double) + extends RegressionModel[Vector, LinearRegressionModel] + with LinearRegressionParams with GeneralMLWritable + with HasTrainingSummary[LinearRegressionTrainingSummary] { + + private[ml] def this(uid: String, coefficients: Vector, intercept: Double) = + this(uid, coefficients, intercept, 1.0) + + override val numFeatures: Int = coefficients.size + + /** + * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is + * thrown if `hasSummary` is false. + */ + @Since("1.5.0") + override def summary: LinearRegressionTrainingSummary = super.summary + + /** + * Evaluates the model on a test dataset. + * + * @param dataset Test dataset to evaluate model on. + */ + @Since("2.0.0") + def evaluate(dataset: Dataset[_]): LinearRegressionSummary = { + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = findSummaryModelAndPredictionCol() + new LinearRegressionSummary(summaryModel.transform(dataset), predictionColName, + $(labelCol), $(featuresCol), summaryModel, Array(0D)) + } + + /** + * If the prediction column is set returns the current model and prediction column, + * otherwise generates a new column and sets it as the prediction column on a new copy + * of the current model. + */ + private[regression] def findSummaryModelAndPredictionCol(): (LinearRegressionModel, String) = { + $(predictionCol) match { + case "" => + val predictionColName = "prediction_" + java.util.UUID.randomUUID.toString + (copy(ParamMap.empty).setPredictionCol(predictionColName), predictionColName) + case p => (this, p) + } + } + + + override def predict(features: Vector): Double = { + dot(features, coefficients) + intercept + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegressionModel = { + val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) + newModel.setSummary(trainingSummary).setParent(parent) + } + + /** + * Returns a [[org.apache.spark.ml.util.GeneralMLWriter]] instance for this ML instance. + * + * For [[LinearRegressionModel]], this does NOT currently save the training [[summary]]. + * An option to save [[summary]] may be added in the future. + * + * This also does not save the [[parent]] currently. + */ + @Since("1.6.0") + override def write: GeneralMLWriter = new GeneralMLWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures" + } +} + +/** A writer for LinearRegression that handles the "internal" (or default) format */ +private class InternalLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "internal" + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector, scale: Double) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val instance = stage.asInstanceOf[LinearRegressionModel] + val sc = sparkSession.sparkContext + // Save metadata and Params + DefaultParamsWriter.saveMetadata(instance, path, sc) + // Save model data: intercept, coefficients, scale + val data = Data(instance.intercept, instance.coefficients, instance.scale) + val dataPath = new Path(path, "data").toString + sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) + } +} + +/** A writer for LinearRegression that handles the "pmml" format */ +private class PMMLLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "pmml" + + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val sc = sparkSession.sparkContext + // Construct the MLLib model which knows how to write to PMML. + val instance = stage.asInstanceOf[LinearRegressionModel] + val oldModel = new OldLinearRegressionModel(instance.coefficients, instance.intercept) + // Save PMML + oldModel.toPMML(sc, path) + } +} + +@Since("1.6.0") +object LinearRegressionModel extends MLReadable[LinearRegressionModel] { + + @Since("1.6.0") + override def read: MLReader[LinearRegressionModel] = new LinearRegressionModelReader + + @Since("1.6.0") + override def load(path: String): LinearRegressionModel = super.load(path) + + private class LinearRegressionModelReader extends MLReader[LinearRegressionModel] { + + /** Checked against metadata when loading model */ + private val className = classOf[LinearRegressionModel].getName + + override def load(path: String): LinearRegressionModel = { + val metadata = DefaultParamsReader.loadMetadata(path, sc, className) + + val dataPath = new Path(path, "data").toString + val data = sparkSession.read.format("parquet").load(dataPath) + val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion) + val model = if (majorVersion < 2 || (majorVersion == 2 && minorVersion <= 2)) { + // Spark 2.2 and before + val Row(intercept: Double, coefficients: Vector) = + MLUtils.convertVectorColumnsToML(data, "coefficients") + .select("intercept", "coefficients") + .head() + new LinearRegressionModel(metadata.uid, coefficients, intercept) + } else { + // Spark 2.3 and later + val Row(intercept: Double, coefficients: Vector, scale: Double) = + data.select("intercept", "coefficients", "scale").head() + new LinearRegressionModel(metadata.uid, coefficients, intercept, scale) + } + + metadata.getAndSetParams(model) + model + } + } +} + +/** + * Linear regression training results. Currently, the training summary ignores the + * training weights except for the objective trace. + * + * @param predictions predictions output by the model's `transform` method. + * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. + */ +@Since("1.5.0") +class LinearRegressionTrainingSummary private[regression] ( + predictions: DataFrame, + predictionCol: String, + labelCol: String, + featuresCol: String, + model: LinearRegressionModel, + diagInvAtWA: Array[Double], + val objectiveHistory: Array[Double]) + extends LinearRegressionSummary( + predictions, + predictionCol, + labelCol, + featuresCol, + model, + diagInvAtWA) { + + /** + * Number of training iterations until termination + * + * This value is only available when using the "l-bfgs" solver. + * + * @see `LinearRegression.solver` + */ + @Since("1.5.0") + val totalIterations = objectiveHistory.length + +} + +/** + * Linear regression results evaluated on a dataset. + * + * @param predictions predictions output by the model's `transform` method. + * @param predictionCol Field in "predictions" which gives the predicted value of the label at + * each instance. + * @param labelCol Field in "predictions" which gives the true label of each instance. + * @param featuresCol Field in "predictions" which gives the features of each instance as a vector. + */ +@Since("1.5.0") +class LinearRegressionSummary private[regression] ( + @transient val predictions: DataFrame, + val predictionCol: String, + val labelCol: String, + val featuresCol: String, + private val privateModel: LinearRegressionModel, + private val diagInvAtWA: Array[Double]) extends Serializable { + + @transient private val metrics = new RegressionMetrics( + predictions + .select(col(predictionCol), col(labelCol).cast(DoubleType)) + .rdd + .map { case Row(pred: Double, label: Double) => (pred, label) }, + !privateModel.getFitIntercept) + + /** + * Returns the explained variance regression score. + * explainedVariance = 1 - variance(y - \hat{y}) / variance(y) + * Reference: + * Wikipedia explain variation + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val explainedVariance: Double = metrics.explainedVariance + + /** + * Returns the mean absolute error, which is a risk function corresponding to the + * expected value of the absolute error loss or l1-norm loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanAbsoluteError: Double = metrics.meanAbsoluteError + + /** + * Returns the mean squared error, which is a risk function corresponding to the + * expected value of the squared error loss or quadratic loss. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val meanSquaredError: Double = metrics.meanSquaredError + + /** + * Returns the root mean squared error, which is defined as the square root of + * the mean squared error. + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val rootMeanSquaredError: Double = metrics.rootMeanSquaredError + + /** + * Returns R^2^, the coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("1.5.0") + val r2: Double = metrics.r2 + + /** + * Returns Adjusted R^2^, the adjusted coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + * + * @note This ignores instance weights (setting all to 1.0) from `LinearRegression.weightCol`. + * This will change in later Spark versions. + */ + @Since("2.3.0") + val r2adj: Double = { + val interceptDOF = if (privateModel.getFitIntercept) 1 else 0 + 1 - (1 - r2) * (numInstances - interceptDOF) / + (numInstances - privateModel.coefficients.size - interceptDOF) + } + + /** Residuals (label - predicted value) */ + @Since("1.5.0") + @transient lazy val residuals: DataFrame = { + val t = udf { (pred: Double, label: Double) => label - pred } + predictions.select(t(col(predictionCol), col(labelCol)).as("residuals")) + } + + /** Number of instances in DataFrame predictions */ + lazy val numInstances: Long = predictions.count() + + /** Degrees of freedom */ + @Since("2.2.0") + val degreesOfFreedom: Long = if (privateModel.getFitIntercept) { + numInstances - privateModel.coefficients.size - 1 + } else { + numInstances - privateModel.coefficients.size + } + + /** + * The weighted residuals, the usual residuals rescaled by + * the square root of the instance weights. + */ + lazy val devianceResiduals: Array[Double] = { + val weighted = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + lit(1.0) + } else { + sqrt(col(privateModel.getWeightCol)) + } + val dr = predictions + .select(col(privateModel.getLabelCol).minus(col(privateModel.getPredictionCol)) + .multiply(weighted).as("weightedResiduals")) + .select(min(col("weightedResiduals")).as("min"), max(col("weightedResiduals")).as("max")) + .first() + Array(dr.getDouble(0), dr.getDouble(1)) + } + + /** + * Standard error of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val coefficientStandardErrors: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No Std. Error of coefficients available for this LinearRegressionModel") + } else { + val rss = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + meanSquaredError * numInstances + } else { + val t = udf { (pred: Double, label: Double, weight: Double) => + math.pow(label - pred, 2.0) * weight } + predictions.select(t(col(privateModel.getPredictionCol), col(privateModel.getLabelCol), + col(privateModel.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0) + } + val sigma2 = rss / degreesOfFreedom + diagInvAtWA.map(_ * sigma2).map(math.sqrt) + } + } + + /** + * T-statistic of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val tValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No t-statistic available for this LinearRegressionModel") + } else { + val estimate = if (privateModel.getFitIntercept) { + Array.concat(privateModel.coefficients.toArray, Array(privateModel.intercept)) + } else { + privateModel.coefficients.toArray + } + estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 } + } + } + + /** + * Two-sided p-value of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val pValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No p-value available for this LinearRegressionModel") + } else { + tValues.map { x => 2.0 * (1.0 - StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) } + } + } + +} + diff --git a/mllib-dal/src/spark-3.1.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib-dal/src/spark-3.1.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala new file mode 100644 index 000000000..07ce88c23 --- /dev/null +++ b/mllib-dal/src/spark-3.1.1/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -0,0 +1,1103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import scala.collection.mutable +import breeze.linalg.{DenseVector => BDV} +import breeze.optimize.{CachedDiffFunction, DiffFunction, FirstOrderMinimizer, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN} +import breeze.stats.distributions.StudentsT +import org.apache.hadoop.fs.Path +import org.apache.spark.SparkException +import org.apache.spark.annotation.Since +import org.apache.spark.internal.Logging +import org.apache.spark.ml.{PipelineStage, PredictorParams} +import org.apache.spark.ml.feature._ +import org.apache.spark.ml.linalg.{Vector, Vectors} +import org.apache.spark.ml.linalg.BLAS._ +import org.apache.spark.ml.optim.WeightedLeastSquares +import org.apache.spark.ml.optim.aggregator._ +import org.apache.spark.ml.optim.loss.{L2Regularization, RDDLossFunction} +import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators} +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.stat._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.util.Instrumentation.instrumented +import org.apache.spark.mllib.evaluation.RegressionMetrics +import org.apache.spark.mllib.linalg.VectorImplicits._ +import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel} +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession} +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.VersionUtils.majorMinorVersion + +/** + * Params for linear regression. + */ +private[regression] trait LinearRegressionParams extends PredictorParams + with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol + with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver + with HasAggregationDepth with HasLoss with HasMaxBlockSizeInMB { + + import LinearRegression._ + + /** + * The solver algorithm for optimization. + * Supported options: "l-bfgs", "normal" and "auto". + * Default: "auto" + * + * @group param + */ + @Since("1.6.0") + final override val solver: Param[String] = new Param[String](this, "solver", + "The solver algorithm for optimization. Supported options: " + + s"${supportedSolvers.mkString(", ")}. (Default auto)", + ParamValidators.inArray[String](supportedSolvers)) + + /** + * The loss function to be optimized. + * Supported options: "squaredError" and "huber". + * Default: "squaredError" + * + * @group param + */ + @Since("2.3.0") + final override val loss: Param[String] = new Param[String](this, "loss", "The loss function to" + + s" be optimized. Supported options: ${supportedLosses.mkString(", ")}. (Default squaredError)", + ParamValidators.inArray[String](supportedLosses)) + + /** + * The shape parameter to control the amount of robustness. Must be > 1.0. + * At larger values of epsilon, the huber criterion becomes more similar to least squares + * regression; for small values of epsilon, the criterion is more similar to L1 regression. + * Default is 1.35 to get as much robustness as possible while retaining + * 95% statistical efficiency for normally distributed data. It matches sklearn + * HuberRegressor and is "M" from + * A robust hybrid of lasso and ridge regression. + * Only valid when "loss" is "huber". + * + * @group expertParam + */ + @Since("2.3.0") + final val epsilon = new DoubleParam(this, "epsilon", "The shape parameter to control the " + + "amount of robustness. Must be > 1.0.", ParamValidators.gt(1.0)) + + /** @group getExpertParam */ + @Since("2.3.0") + def getEpsilon: Double = $(epsilon) + + setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true, + elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto, + aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, maxBlockSizeInMB -> 0.0) + + override protected def validateAndTransformSchema( + schema: StructType, + fitting: Boolean, + featuresDataType: DataType): StructType = { + if (fitting) { + if ($(loss) == Huber) { + require($(solver)!= Normal, "LinearRegression with huber loss doesn't support " + + "normal solver, please change solver to auto or l-bfgs.") + require($(elasticNetParam) == 0.0, "LinearRegression with huber loss only supports " + + s"L2 regularization, but got elasticNetParam = $getElasticNetParam.") + } + } + super.validateAndTransformSchema(schema, fitting, featuresDataType) + } +} + +/** + * Linear regression. + * + * The learning objective is to minimize the specified loss function, with regularization. + * This supports two kinds of loss: + * - squaredError (a.k.a squared loss) + * - huber (a hybrid of squared error for relatively small errors and absolute error for + * relatively large ones, and we estimate the scale parameter from training data) + * + * This supports multiple types of regularization: + * - none (a.k.a. ordinary least squares) + * - L2 (ridge regression) + * - L1 (Lasso) + * - L2 + L1 (elastic net) + * + * The squared error objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w}\frac{1}{2n}{\sum_{i=1}^n(X_{i}w - y_{i})^{2} + + * \lambda\left[\frac{1-\alpha}{2}{||w||_{2}}^{2} + \alpha{||w||_{1}}\right]} + * \end{align} + * $$ + *
+ * + * The huber objective function is: + * + *
+ * $$ + * \begin{align} + * \min_{w, \sigma}\frac{1}{2n}{\sum_{i=1}^n\left(\sigma + + * H_m\left(\frac{X_{i}w - y_{i}}{\sigma}\right)\sigma\right) + \frac{1}{2}\lambda {||w||_2}^2} + * \end{align} + * $$ + *
+ * + * where + * + *
+ * $$ + * \begin{align} + * H_m(z) = \begin{cases} + * z^2, & \text {if } |z| < \epsilon, \\ + * 2\epsilon|z| - \epsilon^2, & \text{otherwise} + * \end{cases} + * \end{align} + * $$ + *
+ * + * Since 3.1.0, it supports stacking instances into blocks and using GEMV for + * better performance. + * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default. + * + * Note: Fitting with huber loss only supports none and L2 regularization. + */ +@Since("1.3.0") +class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String) + extends Regressor[Vector, LinearRegression, LinearRegressionModel] + with LinearRegressionParams with DefaultParamsWritable with Logging { + + import LinearRegression._ + + @Since("1.4.0") + def this() = this(Identifiable.randomUID("linReg")) + + /** + * Set the regularization parameter. + * Default is 0.0. + * + * @group setParam + */ + @Since("1.3.0") + def setRegParam(value: Double): this.type = set(regParam, value) + + /** + * Set if we should fit the intercept. + * Default is true. + * + * @group setParam + */ + @Since("1.5.0") + def setFitIntercept(value: Boolean): this.type = set(fitIntercept, value) + + /** + * Whether to standardize the training features before fitting the model. + * The coefficients of models will be always returned on the original scale, + * so it will be transparent for users. + * Default is true. + * + * @note With/without standardization, the models should be always converged + * to the same solution when no regularization is applied. In R's GLMNET package, + * the default behavior is true as well. + * + * @group setParam + */ + @Since("1.5.0") + def setStandardization(value: Boolean): this.type = set(standardization, value) + + /** + * Set the ElasticNet mixing parameter. + * For alpha = 0, the penalty is an L2 penalty. + * For alpha = 1, it is an L1 penalty. + * For alpha in (0,1), the penalty is a combination of L1 and L2. + * Default is 0.0 which is an L2 penalty. + * + * Note: Fitting with huber loss only supports None and L2 regularization, + * so throws exception if this param is non-zero value. + * + * @group setParam + */ + @Since("1.4.0") + def setElasticNetParam(value: Double): this.type = set(elasticNetParam, value) + + /** + * Set the maximum number of iterations. + * Default is 100. + * + * @group setParam + */ + @Since("1.3.0") + def setMaxIter(value: Int): this.type = set(maxIter, value) + + /** + * Set the convergence tolerance of iterations. + * Smaller value will lead to higher accuracy with the cost of more iterations. + * Default is 1E-6. + * + * @group setParam + */ + @Since("1.4.0") + def setTol(value: Double): this.type = set(tol, value) + + /** + * Whether to over-/under-sample training instances according to the given weights in weightCol. + * If not set or empty, all instances are treated equally (weight 1.0). + * Default is not set, so all instances have weight one. + * + * @group setParam + */ + @Since("1.6.0") + def setWeightCol(value: String): this.type = set(weightCol, value) + + /** + * Set the solver algorithm used for optimization. + * In case of linear regression, this can be "l-bfgs", "normal" and "auto". + * - "l-bfgs" denotes Limited-memory BFGS which is a limited-memory quasi-Newton + * optimization method. + * - "normal" denotes using Normal Equation as an analytical solution to the linear regression + * problem. This solver is limited to `LinearRegression.MAX_FEATURES_FOR_NORMAL_SOLVER`. + * - "auto" (default) means that the solver algorithm is selected automatically. + * The Normal Equations solver will be used when possible, but this will automatically fall + * back to iterative optimization methods when needed. + * + * Note: Fitting with huber loss doesn't support normal solver, + * so throws exception if this param was set with "normal". + * @group setParam + */ + @Since("1.6.0") + def setSolver(value: String): this.type = set(solver, value) + + /** + * Suggested depth for treeAggregate (greater than or equal to 2). + * If the dimensions of features or the number of partitions are large, + * this param could be adjusted to a larger size. + * Default is 2. + * + * @group expertSetParam + */ + @Since("2.1.0") + def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value) + + /** + * Sets the value of param [[loss]]. + * Default is "squaredError". + * + * @group setParam + */ + @Since("2.3.0") + def setLoss(value: String): this.type = set(loss, value) + + /** + * Sets the value of param [[epsilon]]. + * Default is 1.35. + * + * @group setExpertParam + */ + @Since("2.3.0") + def setEpsilon(value: Double): this.type = set(epsilon, value) + + /** + * Sets the value of param [[maxBlockSizeInMB]]. + * Default is 0.0, then 1.0 MB will be chosen. + * + * @group expertSetParam + */ + @Since("3.1.0") + def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value) + + override protected def train( + dataset: Dataset[_]): LinearRegressionModel = instrumented { instr => + instr.logPipelineStage(this) + instr.logDataset(dataset) + instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol, + elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss, + epsilon, maxBlockSizeInMB) + + if (dataset.storageLevel != StorageLevel.NONE) { + instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " + + s"then cached during training. Be careful of double caching!") + } + + // Extract the number of features before deciding optimization solver. + val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol)) + instr.logNumFeatures(numFeatures) + + if ($(loss) == SquaredError && (($(solver) == Auto && + numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) { + return trainWithNormal(dataset, instr) + } + + val instances = extractInstances(dataset) + .setName("training instances") + + val (summarizer, labelSummarizer) = Summarizer + .getRegressionSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count")) + + val yMean = labelSummarizer.mean(0) + val rawYStd = labelSummarizer.std(0) + + instr.logNumExamples(labelSummarizer.count) + instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean) + instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd) + instr.logSumOfWeights(summarizer.weightSum) + + var actualBlockSizeInMB = $(maxBlockSizeInMB) + if (actualBlockSizeInMB == 0) { + actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB + require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0") + instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString) + } + + if (rawYStd == 0.0) { + if ($(fitIntercept) || yMean == 0.0) { + return trainWithConstantLabel(dataset, instr, numFeatures, yMean) + } else { + require($(regParam) == 0.0, "The standard deviation of the label is zero. " + + "Model cannot be regularized.") + instr.logWarning(s"The standard deviation of the label is zero. " + + "Consider setting fitIntercept=true.") + } + } + + // if y is constant (rawYStd is zero), then y cannot be scaled. In this case + // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm. + val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean) + val featuresMean = summarizer.mean.toArray + val featuresStd = summarizer.std.toArray + + if (!$(fitIntercept) && (0 until numFeatures).exists { i => + featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) { + instr.logWarning("Fitting LinearRegressionModel without intercept on dataset with " + + "constant nonzero column, Spark MLlib outputs zero coefficients for constant nonzero " + + "columns. This behavior is the same as R glmnet but different from LIBSVM.") + } + + // Since we implicitly do the feature scaling when we compute the cost function + // to improve the convergence, the effective regParam will be changed. + val effectiveRegParam = $(loss) match { + case SquaredError => $(regParam) / yStd + case Huber => $(regParam) + } + val effectiveL1RegParam = $(elasticNetParam) * effectiveRegParam + val effectiveL2RegParam = (1.0 - $(elasticNetParam)) * effectiveRegParam + + val getFeaturesStd = (j: Int) => if (j >= 0 && j < numFeatures) featuresStd(j) else 0.0 + val regularization = if (effectiveL2RegParam != 0.0) { + val shouldApply = (idx: Int) => idx >= 0 && idx < numFeatures + Some(new L2Regularization(effectiveL2RegParam, shouldApply, + if ($(standardization)) None else Some(getFeaturesStd))) + } else None + + val optimizer = createOptimizer(effectiveRegParam, effectiveL1RegParam, + numFeatures, featuresStd) + + val initialValues = $(loss) match { + case SquaredError => + Vectors.zeros(numFeatures) + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + Vectors.dense(Array.fill(dim)(1.0)) + } + + val (parameters, objectiveHistory) = + trainImpl(instances, actualBlockSizeInMB, yMean, yStd, + featuresMean, featuresStd, initialValues, regularization, optimizer) + + if (parameters == null) { + val msg = s"${optimizer.getClass.getName} failed." + instr.logError(msg) + throw new SparkException(msg) + } + + val model = createModel(parameters, yMean, yStd, featuresMean, featuresStd) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), predictionColName, $(labelCol), $(featuresCol), + model, Array(0.0), objectiveHistory) + model.setSummary(Some(trainingSummary)) + } + + private def trainWithNormal( + dataset: Dataset[_], + instr: Instrumentation): LinearRegressionModel = { + // oneDAL only support simple linear regression and ridge regression + val paramSupported = ($(regParam) == 0) || ($(regParam) != 0 && $(elasticNetParam) == 0) + if (paramSupported && Utils.isOAPEnabled) { + val executor_num = Utils.sparkExecutorNum(dataset.sparkSession.sparkContext) + val executor_cores = Utils.sparkExecutorCores() + logInfo(s"LinearRegressionDAL fit using $executor_num Executors") + + val optimizer = new LinearRegressionDALImpl($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + executor_num, executor_cores) + + // Return same model as WeightedLeastSquaresModel + val model = optimizer.train(dataset, Some(instr)) + + val lrModel = copyValues( + new LinearRegressionModel(uid, model.coefficients, model.intercept)) + + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), + predictionColName, + $(labelCol), + $(featuresCol), + summaryModel, + model.diagInvAtWA.toArray, + model.objectiveHistory) + + return lrModel.setSummary(Some(trainingSummary)) + } else { + // For low dimensional data, WeightedLeastSquares is more efficient since the + // training algorithm only requires one pass through the data. (SPARK-10668) + + val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), + elasticNetParam = $(elasticNetParam), $(standardization), true, + solverType = WeightedLeastSquares.Auto, maxIter = $(maxIter), tol = $(tol)) + val instances = extractInstances(dataset) + .setName("training instances") + val model = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr)) + // When it is trained by WeightedLeastSquares, training summary does not + // attach returned model. + val lrModel = copyValues(new LinearRegressionModel(uid, model.coefficients, model.intercept)) + val (summaryModel, predictionColName) = lrModel.findSummaryModelAndPredictionCol() + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), predictionColName, $(labelCol), $(featuresCol), + summaryModel, model.diagInvAtWA.toArray, model.objectiveHistory) + + lrModel.setSummary(Some(trainingSummary)) + } + } + + private def trainWithConstantLabel( + dataset: Dataset[_], + instr: Instrumentation, + numFeatures: Int, + yMean: Double): LinearRegressionModel = { + // If the rawYStd==0 and fitIntercept==true, then the intercept is yMean with + // zero coefficient; as a result, training is not needed. + // Also, if rawYStd==0 and yMean==0, all the coefficients are zero regardless of + // the fitIntercept. + if (yMean == 0.0) { + instr.logWarning(s"Mean and standard deviation of the label are zero, so the " + + s"coefficients and the intercept will all be zero; as a result, training is not " + + s"needed.") + } else { + instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " + + s"will be zeros and the intercept will be the mean of the label; as a result, " + + s"training is not needed.") + } + val coefficients = Vectors.sparse(numFeatures, Seq.empty) + val intercept = yMean + + val model = copyValues(new LinearRegressionModel(uid, coefficients, intercept)) + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = model.findSummaryModelAndPredictionCol() + + val trainingSummary = new LinearRegressionTrainingSummary( + summaryModel.transform(dataset), predictionColName, $(labelCol), $(featuresCol), + model, Array(0.0), Array(0.0)) + + model.setSummary(Some(trainingSummary)) + } + + private def createOptimizer( + effectiveRegParam: Double, + effectiveL1RegParam: Double, + numFeatures: Int, + featuresStd: Array[Double]) = { + $(loss) match { + case SquaredError => + if ($(elasticNetParam) == 0.0 || effectiveRegParam == 0.0) { + new BreezeLBFGS[BDV[Double]]($(maxIter), 10, $(tol)) + } else { + val standardizationParam = $(standardization) + def effectiveL1RegFun = (index: Int) => { + if (standardizationParam) { + effectiveL1RegParam + } else { + // If `standardization` is false, we still standardize the data + // to improve the rate of convergence; as a result, we have to + // perform this reverse standardization by penalizing each component + // differently to get effectively the same objective function when + // the training dataset is not standardized. + if (featuresStd(index) != 0.0) effectiveL1RegParam / featuresStd(index) else 0.0 + } + } + new BreezeOWLQN[Int, BDV[Double]]($(maxIter), 10, effectiveL1RegFun, $(tol)) + } + case Huber => + val dim = if ($(fitIntercept)) numFeatures + 2 else numFeatures + 1 + val lowerBounds = BDV[Double](Array.fill(dim)(Double.MinValue)) + // Optimize huber loss in space "\sigma > 0" + lowerBounds(dim - 1) = Double.MinPositiveValue + val upperBounds = BDV[Double](Array.fill(dim)(Double.MaxValue)) + new BreezeLBFGSB(lowerBounds, upperBounds, $(maxIter), 10, $(tol)) + } + } + + private def trainImpl( + instances: RDD[Instance], + actualBlockSizeInMB: Double, + yMean: Double, + yStd: Double, + featuresMean: Array[Double], + featuresStd: Array[Double], + initialValues: Vector, + regularization: Option[L2Regularization], + optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = { + val bcFeaturesMean = instances.context.broadcast(featuresMean) + val bcFeaturesStd = instances.context.broadcast(featuresStd) + + val standardized = instances.mapPartitions { iter => + val inverseStd = bcFeaturesStd.value.map { std => if (std != 0) 1.0 / std else 0.0 } + val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true) + iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) } + } + + val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong + val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage) + .persist(StorageLevel.MEMORY_AND_DISK) + .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)") + + val costFun = $(loss) match { + case SquaredError => + val getAggregatorFunc = new BlockLeastSquaresAggregator(yStd, yMean, $(fitIntercept), + bcFeaturesStd, bcFeaturesMean)(_) + new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth)) + case Huber => + val getAggregatorFunc = new BlockHuberAggregator($(fitIntercept), $(epsilon))(_) + new RDDLossFunction(blocks, getAggregatorFunc, regularization, $(aggregationDepth)) + } + + val states = optimizer.iterations(new CachedDiffFunction(costFun), + initialValues.asBreeze.toDenseVector) + + /* + Note that in Linear Regression, the objective history (loss + regularization) returned + from optimizer is computed in the scaled space given by the following formula. +
+ $$ + L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2 + + regTerms \\ + $$ +
+ */ + val arrayBuilder = mutable.ArrayBuilder.make[Double] + var state: optimizer.State = null + while (states.hasNext) { + state = states.next() + arrayBuilder += state.adjustedValue + } + + blocks.unpersist() + bcFeaturesMean.destroy() + bcFeaturesStd.destroy() + + (if (state == null) null else state.x.toArray, arrayBuilder.result) + } + + private def createModel( + parameters: Array[Double], + yMean: Double, + yStd: Double, + featuresMean: Array[Double], + featuresStd: Array[Double]): LinearRegressionModel = { + val numFeatures = featuresStd.length + /* + The coefficients are trained in the scaled space; we're converting them back to + the original space. + */ + val rawCoefficients = $(loss) match { + case SquaredError => parameters.clone() + case Huber => parameters.take(numFeatures) + } + + var i = 0 + val len = rawCoefficients.length + val multiplier = $(loss) match { + case SquaredError => yStd + case Huber => 1.0 + } + while (i < len) { + rawCoefficients(i) *= { if (featuresStd(i) != 0.0) multiplier / featuresStd(i) else 0.0 } + i += 1 + } + + val intercept = if ($(fitIntercept)) { + $(loss) match { + case SquaredError => + /* + The intercept of squared error in R's GLMNET is computed using closed form + after the coefficients are converged. See the following discussion for detail. + http://stats.stackexchange.com/questions/13617/how-is-the-intercept-computed-in-glmnet + */ + yMean - dot(Vectors.dense(rawCoefficients), Vectors.dense(featuresMean)) + case Huber => parameters(numFeatures) + } + } else 0.0 + + val scale = $(loss) match { + case SquaredError => 1.0 + case Huber => parameters.last + } + + val coefficients = Vectors.dense(rawCoefficients).compressed + + copyValues(new LinearRegressionModel(uid, coefficients, intercept, scale)) + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegression = defaultCopy(extra) +} + +@Since("1.6.0") +object LinearRegression extends DefaultParamsReadable[LinearRegression] { + + @Since("1.6.0") + override def load(path: String): LinearRegression = super.load(path) + + /** + * When using `LinearRegression.solver` == "normal", the solver must limit the number of + * features to at most this number. The entire covariance matrix X^T^X will be collected + * to the driver. This limit helps prevent memory overflow errors. + */ + @Since("2.1.0") + val MAX_FEATURES_FOR_NORMAL_SOLVER: Int = WeightedLeastSquares.MAX_NUM_FEATURES + + /** String name for "auto". */ + private[regression] val Auto = "auto" + + /** String name for "normal". */ + private[regression] val Normal = "normal" + + /** String name for "l-bfgs". */ + private[regression] val LBFGS = "l-bfgs" + + /** Set of solvers that LinearRegression supports. */ + private[regression] val supportedSolvers = Array(Auto, Normal, LBFGS) + + /** String name for "squaredError". */ + private[regression] val SquaredError = "squaredError" + + /** String name for "huber". */ + private[regression] val Huber = "huber" + + /** Set of loss function names that LinearRegression supports. */ + private[regression] val supportedLosses = Array(SquaredError, Huber) +} + +/** + * Model produced by [[LinearRegression]]. + */ +@Since("1.3.0") +class LinearRegressionModel private[ml] ( + @Since("1.4.0") override val uid: String, + @Since("2.0.0") val coefficients: Vector, + @Since("1.3.0") val intercept: Double, + @Since("2.3.0") val scale: Double) + extends RegressionModel[Vector, LinearRegressionModel] + with LinearRegressionParams with GeneralMLWritable + with HasTrainingSummary[LinearRegressionTrainingSummary] { + + private[ml] def this(uid: String, coefficients: Vector, intercept: Double) = + this(uid, coefficients, intercept, 1.0) + + override val numFeatures: Int = coefficients.size + + /** + * Gets summary (e.g. residuals, mse, r-squared ) of model on training set. An exception is + * thrown if `hasSummary` is false. + */ + @Since("1.5.0") + override def summary: LinearRegressionTrainingSummary = super.summary + + /** + * Evaluates the model on a test dataset. + * + * @param dataset Test dataset to evaluate model on. + */ + @Since("2.0.0") + def evaluate(dataset: Dataset[_]): LinearRegressionSummary = { + // Handle possible missing or invalid prediction columns + val (summaryModel, predictionColName) = findSummaryModelAndPredictionCol() + new LinearRegressionSummary(summaryModel.transform(dataset), predictionColName, + $(labelCol), $(featuresCol), summaryModel, Array(0.0)) + } + + /** + * If the prediction column is set returns the current model and prediction column, + * otherwise generates a new column and sets it as the prediction column on a new copy + * of the current model. + */ + private[regression] def findSummaryModelAndPredictionCol(): (LinearRegressionModel, String) = { + $(predictionCol) match { + case "" => + val predictionColName = "prediction_" + java.util.UUID.randomUUID.toString + (copy(ParamMap.empty).setPredictionCol(predictionColName), predictionColName) + case p => (this, p) + } + } + + + override def predict(features: Vector): Double = { + dot(features, coefficients) + intercept + } + + @Since("1.4.0") + override def copy(extra: ParamMap): LinearRegressionModel = { + val newModel = copyValues(new LinearRegressionModel(uid, coefficients, intercept), extra) + newModel.setSummary(trainingSummary).setParent(parent) + } + + /** + * Returns a [[org.apache.spark.ml.util.GeneralMLWriter]] instance for this ML instance. + * + * For [[LinearRegressionModel]], this does NOT currently save the training [[summary]]. + * An option to save [[summary]] may be added in the future. + * + * This also does not save the [[parent]] currently. + */ + @Since("1.6.0") + override def write: GeneralMLWriter = new GeneralMLWriter(this) + + @Since("3.0.0") + override def toString: String = { + s"LinearRegressionModel: uid=$uid, numFeatures=$numFeatures" + } +} + +/** A writer for LinearRegression that handles the "internal" (or default) format */ +private class InternalLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "internal" + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector, scale: Double) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val instance = stage.asInstanceOf[LinearRegressionModel] + val sc = sparkSession.sparkContext + // Save metadata and Params + DefaultParamsWriter.saveMetadata(instance, path, sc) + // Save model data: intercept, coefficients, scale + val data = Data(instance.intercept, instance.coefficients, instance.scale) + val dataPath = new Path(path, "data").toString + sparkSession.createDataFrame(Seq(data)).repartition(1).write.parquet(dataPath) + } +} + +/** A writer for LinearRegression that handles the "pmml" format */ +private class PMMLLinearRegressionModelWriter + extends MLWriterFormat with MLFormatRegister { + + override def format(): String = "pmml" + + override def stageName(): String = "org.apache.spark.ml.regression.LinearRegressionModel" + + private case class Data(intercept: Double, coefficients: Vector) + + override def write(path: String, sparkSession: SparkSession, + optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = { + val sc = sparkSession.sparkContext + // Construct the MLLib model which knows how to write to PMML. + val instance = stage.asInstanceOf[LinearRegressionModel] + val oldModel = new OldLinearRegressionModel(instance.coefficients, instance.intercept) + // Save PMML + oldModel.toPMML(sc, path) + } +} + +@Since("1.6.0") +object LinearRegressionModel extends MLReadable[LinearRegressionModel] { + + @Since("1.6.0") + override def read: MLReader[LinearRegressionModel] = new LinearRegressionModelReader + + @Since("1.6.0") + override def load(path: String): LinearRegressionModel = super.load(path) + + private class LinearRegressionModelReader extends MLReader[LinearRegressionModel] { + + /** Checked against metadata when loading model */ + private val className = classOf[LinearRegressionModel].getName + + override def load(path: String): LinearRegressionModel = { + val metadata = DefaultParamsReader.loadMetadata(path, sc, className) + + val dataPath = new Path(path, "data").toString + val data = sparkSession.read.format("parquet").load(dataPath) + val (majorVersion, minorVersion) = majorMinorVersion(metadata.sparkVersion) + val model = if (majorVersion < 2 || (majorVersion == 2 && minorVersion <= 2)) { + // Spark 2.2 and before + val Row(intercept: Double, coefficients: Vector) = + MLUtils.convertVectorColumnsToML(data, "coefficients") + .select("intercept", "coefficients") + .head() + new LinearRegressionModel(metadata.uid, coefficients, intercept) + } else { + // Spark 2.3 and later + val Row(intercept: Double, coefficients: Vector, scale: Double) = + data.select("intercept", "coefficients", "scale").head() + new LinearRegressionModel(metadata.uid, coefficients, intercept, scale) + } + + metadata.getAndSetParams(model) + model + } + } +} + +/** + * Linear regression training results. Currently, the training summary ignores the + * training weights except for the objective trace. + * + * @param predictions predictions output by the model's `transform` method. + * @param objectiveHistory objective function (scaled loss + regularization) at each iteration. + */ +@Since("1.5.0") +class LinearRegressionTrainingSummary private[regression] ( + predictions: DataFrame, + predictionCol: String, + labelCol: String, + featuresCol: String, + model: LinearRegressionModel, + diagInvAtWA: Array[Double], + val objectiveHistory: Array[Double]) + extends LinearRegressionSummary( + predictions, + predictionCol, + labelCol, + featuresCol, + model, + diagInvAtWA) { + + /** + * Number of training iterations until termination + * + * This value is only available when using the "l-bfgs" solver. + * + * @see `LinearRegression.solver` + */ + @Since("1.5.0") + val totalIterations = { + assert(objectiveHistory.length > 0, s"objectiveHistory length should be greater than 1.") + objectiveHistory.length - 1 + } +} + +/** + * Linear regression results evaluated on a dataset. + * + * @param predictions predictions output by the model's `transform` method. + * @param predictionCol Field in "predictions" which gives the predicted value of the label at + * each instance. + * @param labelCol Field in "predictions" which gives the true label of each instance. + * @param featuresCol Field in "predictions" which gives the features of each instance as a vector. + */ +@Since("1.5.0") +class LinearRegressionSummary private[regression] ( + @transient val predictions: DataFrame, + val predictionCol: String, + val labelCol: String, + val featuresCol: String, + private val privateModel: LinearRegressionModel, + private val diagInvAtWA: Array[Double]) extends Serializable { + + @transient private val metrics = { + val weightCol = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + lit(1.0) + } else { + col(privateModel.getWeightCol).cast(DoubleType) + } + + new RegressionMetrics( + predictions + .select(col(predictionCol), col(labelCol).cast(DoubleType), weightCol) + .rdd + .map { case Row(pred: Double, label: Double, weight: Double) => (pred, label, weight) }, + !privateModel.getFitIntercept) + } + + /** + * Returns the explained variance regression score. + * explainedVariance = 1 - variance(y - \hat{y}) / variance(y) + * Reference: + * Wikipedia explain variation + */ + @Since("1.5.0") + val explainedVariance: Double = metrics.explainedVariance + + /** + * Returns the mean absolute error, which is a risk function corresponding to the + * expected value of the absolute error loss or l1-norm loss. + */ + @Since("1.5.0") + val meanAbsoluteError: Double = metrics.meanAbsoluteError + + /** + * Returns the mean squared error, which is a risk function corresponding to the + * expected value of the squared error loss or quadratic loss. + */ + @Since("1.5.0") + val meanSquaredError: Double = metrics.meanSquaredError + + /** + * Returns the root mean squared error, which is defined as the square root of + * the mean squared error. + */ + @Since("1.5.0") + val rootMeanSquaredError: Double = metrics.rootMeanSquaredError + + /** + * Returns R^2^, the coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + */ + @Since("1.5.0") + val r2: Double = metrics.r2 + + /** + * Returns Adjusted R^2^, the adjusted coefficient of determination. + * Reference: + * Wikipedia coefficient of determination + */ + @Since("2.3.0") + val r2adj: Double = { + val interceptDOF = if (privateModel.getFitIntercept) 1 else 0 + 1 - (1 - r2) * (numInstances - interceptDOF) / + (numInstances - privateModel.coefficients.size - interceptDOF) + } + + /** Residuals (label - predicted value) */ + @Since("1.5.0") + @transient lazy val residuals: DataFrame = { + val t = udf { (pred: Double, label: Double) => label - pred } + predictions.select(t(col(predictionCol), col(labelCol)).as("residuals")) + } + + /** Number of instances in DataFrame predictions */ + lazy val numInstances: Long = metrics.count + + /** Degrees of freedom */ + @Since("2.2.0") + val degreesOfFreedom: Long = if (privateModel.getFitIntercept) { + numInstances - privateModel.coefficients.size - 1 + } else { + numInstances - privateModel.coefficients.size + } + + /** + * The weighted residuals, the usual residuals rescaled by + * the square root of the instance weights. + */ + lazy val devianceResiduals: Array[Double] = { + val weighted = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + lit(1.0) + } else { + sqrt(col(privateModel.getWeightCol)) + } + val dr = predictions + .select(col(privateModel.getLabelCol).minus(col(privateModel.getPredictionCol)) + .multiply(weighted).as("weightedResiduals")) + .select(min(col("weightedResiduals")).as("min"), max(col("weightedResiduals")).as("max")) + .first() + Array(dr.getDouble(0), dr.getDouble(1)) + } + + /** + * Standard error of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val coefficientStandardErrors: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No Std. Error of coefficients available for this LinearRegressionModel") + } else { + val rss = + if (!privateModel.isDefined(privateModel.weightCol) || privateModel.getWeightCol.isEmpty) { + meanSquaredError * numInstances + } else { + val t = udf { (pred: Double, label: Double, weight: Double) => + math.pow(label - pred, 2.0) * weight } + predictions.select(t(col(privateModel.getPredictionCol), col(privateModel.getLabelCol), + col(privateModel.getWeightCol)).as("wse")).agg(sum(col("wse"))).first().getDouble(0) + } + val sigma2 = rss / degreesOfFreedom + diagInvAtWA.map(_ * sigma2).map(math.sqrt) + } + } + + /** + * T-statistic of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val tValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No t-statistic available for this LinearRegressionModel") + } else { + val estimate = if (privateModel.getFitIntercept) { + Array.concat(privateModel.coefficients.toArray, Array(privateModel.intercept)) + } else { + privateModel.coefficients.toArray + } + estimate.zip(coefficientStandardErrors).map { x => x._1 / x._2 } + } + } + + /** + * Two-sided p-value of estimated coefficients and intercept. + * This value is only available when using the "normal" solver. + * + * If `LinearRegression.fitIntercept` is set to true, + * then the last element returned corresponds to the intercept. + * + * @see `LinearRegression.solver` + */ + lazy val pValues: Array[Double] = { + if (diagInvAtWA.length == 1 && diagInvAtWA(0) == 0) { + throw new UnsupportedOperationException( + "No p-value available for this LinearRegressionModel") + } else { + tValues.map { x => 2.0 * (1.0 - StudentsT(degreesOfFreedom.toDouble).cdf(math.abs(x))) } + } + } + +} + diff --git a/mllib-dal/src/test/scala/org/apache/spark/ml/regression/IntelLinearRegressionSuite.scala b/mllib-dal/src/test/scala/org/apache/spark/ml/regression/IntelLinearRegressionSuite.scala new file mode 100755 index 000000000..60c8d0a63 --- /dev/null +++ b/mllib-dal/src/test/scala/org/apache/spark/ml/regression/IntelLinearRegressionSuite.scala @@ -0,0 +1,1314 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.regression + +import scala.collection.JavaConverters._ +import scala.util.Random + +import org.dmg.pmml.{OpType, PMML} +import org.dmg.pmml.regression.{RegressionModel => PMMLRegressionModel} + +import org.apache.spark.ml.feature.Instance +import org.apache.spark.ml.feature.LabeledPoint +import org.apache.spark.ml.linalg.{DenseVector, Vector, Vectors} +import org.apache.spark.ml.param.{ParamMap, ParamsSuite} +import org.apache.spark.ml.util._ +import org.apache.spark.ml.util.TestingUtils._ +import org.apache.spark.mllib.util.LinearDataGenerator +import org.apache.spark.sql.{DataFrame, Row} + + +class IntelLinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTest { + + import testImplicits._ + + private val seed: Int = 42 + @transient var datasetWithDenseFeature: DataFrame = _ + @transient var datasetWithStrongNoise: DataFrame = _ + @transient var datasetWithDenseFeatureWithoutIntercept: DataFrame = _ + @transient var datasetWithSparseFeature: DataFrame = _ + @transient var datasetWithWeight: DataFrame = _ + @transient var datasetWithWeightConstantLabel: DataFrame = _ + @transient var datasetWithWeightZeroLabel: DataFrame = _ + @transient var datasetWithOutlier: DataFrame = _ + + override def beforeAll(): Unit = { + super.beforeAll() + datasetWithDenseFeature = sc.parallelize(LinearDataGenerator.generateLinearInput( + intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3), + xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2).map(_.asML).toDF() + + datasetWithStrongNoise = sc.parallelize(LinearDataGenerator.generateLinearInput( + intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3), + xVariance = Array(0.7, 1.2), nPoints = 100, seed, eps = 5.0), 2).map(_.asML).toDF() + + /* + datasetWithDenseFeatureWithoutIntercept is not needed for correctness testing + but is useful for illustrating training model without intercept + */ + datasetWithDenseFeatureWithoutIntercept = sc.parallelize( + LinearDataGenerator.generateLinearInput( + intercept = 0.0, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3), + xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2).map(_.asML).toDF() + + val r = new Random(seed) + // When feature size is larger than 4096, normal optimizer is chosen + // as the solver of linear regression in the case of "auto" mode. + val featureSize = 4100 + datasetWithSparseFeature = sc.parallelize(LinearDataGenerator.generateLinearInput( + intercept = 0.0, weights = Seq.fill(featureSize)(r.nextDouble()).toArray, + xMean = Seq.fill(featureSize)(r.nextDouble()).toArray, + xVariance = Seq.fill(featureSize)(r.nextDouble()).toArray, nPoints = 200, + seed, eps = 0.1, sparsity = 0.7), 2).map(_.asML).toDF() + + /* + R code: + + A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2) + b <- c(17, 19, 23, 29) + w <- c(1, 2, 3, 4) + df <- as.data.frame(cbind(A, b)) + */ + datasetWithWeight = sc.parallelize(Seq( + Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse), + Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)), + Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)), + Instance(29.0, 4.0, Vectors.dense(3.0, 13.0)) + ), 2).toDF() + + /* + R code: + + A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2) + b.const <- c(17, 17, 17, 17) + w <- c(1, 2, 3, 4) + df.const.label <- as.data.frame(cbind(A, b.const)) + */ + datasetWithWeightConstantLabel = sc.parallelize(Seq( + Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse), + Instance(17.0, 2.0, Vectors.dense(1.0, 7.0)), + Instance(17.0, 3.0, Vectors.dense(2.0, 11.0)), + Instance(17.0, 4.0, Vectors.dense(3.0, 13.0)) + ), 2).toDF() + + datasetWithWeightZeroLabel = sc.parallelize(Seq( + Instance(0.0, 1.0, Vectors.dense(0.0, 5.0).toSparse), + Instance(0.0, 2.0, Vectors.dense(1.0, 7.0)), + Instance(0.0, 3.0, Vectors.dense(2.0, 11.0)), + Instance(0.0, 4.0, Vectors.dense(3.0, 13.0)) + ), 2).toDF() + + datasetWithOutlier = { + val inlierData = LinearDataGenerator.generateLinearInput( + intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3), + xVariance = Array(0.7, 1.2), nPoints = 900, seed, eps = 0.1) + val outlierData = LinearDataGenerator.generateLinearInput( + intercept = -2.1, weights = Array(0.6, -1.2), xMean = Array(0.9, -1.3), + xVariance = Array(1.5, 0.8), nPoints = 100, seed, eps = 0.1) + sc.parallelize(inlierData ++ outlierData, 2).map(_.asML).toDF() + } + } + + /** + * Enable the ignored test to export the dataset into CSV format, + * so we can validate the training accuracy compared with R's glmnet package. + */ + ignore("export test data into CSV format") { + datasetWithDenseFeature.rdd.map { case Row(label: Double, features: Vector) => + label + "," + features.toArray.mkString(",") + }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithDenseFeature") + + datasetWithDenseFeatureWithoutIntercept.rdd.map { + case Row(label: Double, features: Vector) => + label + "," + features.toArray.mkString(",") + }.repartition(1).saveAsTextFile( + "target/tmp/LinearRegressionSuite/datasetWithDenseFeatureWithoutIntercept") + + datasetWithSparseFeature.rdd.map { case Row(label: Double, features: Vector) => + label + "," + features.toArray.mkString(",") + }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithSparseFeature") + + datasetWithOutlier.rdd.map { case Row(label: Double, features: Vector) => + label + "," + features.toArray.mkString(",") + }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithOutlier") + } + + test("params") { + ParamsSuite.checkParams(new LinearRegression) + val model = new LinearRegressionModel("linearReg", Vectors.dense(0.0), 0.0) + ParamsSuite.checkParams(model) + } + + test("linear regression: default params") { + val lir = new LinearRegression + assert(lir.getLabelCol === "label") + assert(lir.getFeaturesCol === "features") + assert(lir.getPredictionCol === "prediction") + assert(lir.getRegParam === 0.0) + assert(lir.getElasticNetParam === 0.0) + assert(lir.getFitIntercept) + assert(lir.getStandardization) + assert(lir.getSolver === "auto") + assert(lir.getLoss === "squaredError") + assert(lir.getEpsilon === 1.35) + val model = lir.fit(datasetWithDenseFeature) + + MLTestingUtils.checkCopyAndUids(lir, model) + assert(model.hasSummary) + val copiedModel = model.copy(ParamMap.empty) + assert(copiedModel.hasSummary) + model.setSummary(None) + assert(!model.hasSummary) + + model.transform(datasetWithDenseFeature) + .select("label", "prediction") + .collect() + assert(model.getFeaturesCol === "features") + assert(model.getPredictionCol === "prediction") + assert(model.intercept !== 0.0) + assert(model.scale === 1.0) + assert(model.hasParent) + val numFeatures = datasetWithDenseFeature.select("features").first().getAs[Vector](0).size + assert(model.numFeatures === numFeatures) + } + + test("linear regression: can transform data with LinearRegressionModel") { + withClue("training related params like loss are only validated during fitting phase") { + val original = new LinearRegression().fit(datasetWithDenseFeature) + + val deserialized = new LinearRegressionModel(uid = original.uid, + coefficients = original.coefficients, + intercept = original.intercept) + val output = deserialized.transform(datasetWithDenseFeature) + assert(output.collect().size > 0) // simple assertion to ensure no exception thrown + } + } + + test("linear regression: illegal params") { + withClue("LinearRegression with huber loss only supports L2 regularization") { + intercept[IllegalArgumentException] { + new LinearRegression().setLoss("huber").setElasticNetParam(0.5) + .fit(datasetWithDenseFeature) + } + } + + withClue("LinearRegression with huber loss doesn't support normal solver") { + intercept[IllegalArgumentException] { + new LinearRegression().setLoss("huber").setSolver("normal").fit(datasetWithDenseFeature) + } + } + } + +// test("linear regression handles singular matrices") { +// // check for both constant columns with intercept (zero std) and collinear +// val singularDataConstantColumn = sc.parallelize(Seq( +// Instance(17.0, 1.0, Vectors.dense(1.0, 5.0).toSparse), +// Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)), +// Instance(23.0, 3.0, Vectors.dense(1.0, 11.0)), +// Instance(29.0, 4.0, Vectors.dense(1.0, 13.0)) +// ), 2).toDF() +// +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer = new LinearRegression().setSolver(solver).setFitIntercept(true) +// val model = trainer.fit(singularDataConstantColumn) +// // to make it clear that WLS did not solve analytically +// intercept[UnsupportedOperationException] { +// model.summary.coefficientStandardErrors +// } +// assert(model.summary.objectiveHistory !== Array(0.0)) +// } +// +// val singularDataCollinearFeatures = sc.parallelize(Seq( +// Instance(17.0, 1.0, Vectors.dense(10.0, 5.0).toSparse), +// Instance(19.0, 2.0, Vectors.dense(14.0, 7.0)), +// Instance(23.0, 3.0, Vectors.dense(22.0, 11.0)), +// Instance(29.0, 4.0, Vectors.dense(26.0, 13.0)) +// ), 2).toDF() +// +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer = new LinearRegression().setSolver(solver).setFitIntercept(true) +// val model = trainer.fit(singularDataCollinearFeatures) +// intercept[UnsupportedOperationException] { +// model.summary.coefficientStandardErrors +// } +// assert(model.summary.objectiveHistory !== Array(0.0)) +// } +// } + + test("linear regression with intercept without regularization") { + Seq("auto", "l-bfgs", "normal").foreach { solver => + val trainer1 = new LinearRegression().setSolver(solver) + // The result should be the same regardless of standardization without regularization + val trainer2 = (new LinearRegression).setStandardization(false).setSolver(solver) + val model1 = trainer1.fit(datasetWithDenseFeature) + val model2 = trainer2.fit(datasetWithDenseFeature) + + /* + Using the following R code to load the data and train the model using glmnet package. + + library("glmnet") + data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE) + features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3))) + label <- as.numeric(data$V1) + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0)) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.298698 + as.numeric.data.V2. 4.700706 + as.numeric.data.V3. 7.199082 + */ + val interceptR = 6.298698 + val coefficientsR = Vectors.dense(4.700706, 7.199082) + + assert(model1.intercept ~== interceptR relTol 1E-3) + assert(model1.coefficients ~= coefficientsR relTol 1E-3) + assert(model2.intercept ~== interceptR relTol 1E-3) + assert(model2.coefficients ~= coefficientsR relTol 1E-3) + + testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, + "features", "prediction") { + case Row(features: DenseVector, prediction1: Double) => + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept + assert(prediction1 ~== prediction2 relTol 1E-5) + } + } + } + +// test("linear regression without intercept without regularization") { +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer1 = (new LinearRegression).setFitIntercept(false).setSolver(solver) +// // Without regularization the results should be the same +// val trainer2 = (new LinearRegression).setFitIntercept(false).setStandardization(false) +// .setSolver(solver) +// val model1 = trainer1.fit(datasetWithDenseFeature) +// val modelWithoutIntercept1 = trainer1.fit(datasetWithDenseFeatureWithoutIntercept) +// val model2 = trainer2.fit(datasetWithDenseFeature) +// val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept) +// +// /* +// coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0, +// intercept = FALSE)) +// > coefficients +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) . +// as.numeric.data.V2. 6.973403 +// as.numeric.data.V3. 5.284370 +// */ +// val coefficientsR = Vectors.dense(6.973403, 5.284370) +// +// assert(model1.intercept ~== 0 absTol 1E-2) +// assert(model1.coefficients ~= coefficientsR relTol 1E-2) +// assert(model2.intercept ~== 0 absTol 1E-2) +// assert(model2.coefficients ~= coefficientsR relTol 1E-2) +// +// /* +// Then again with the data with no intercept: +// > coefficientsWithoutIntercept +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) . +// as.numeric.data3.V2. 4.70011 +// as.numeric.data3.V3. 7.19943 +// */ +// val coefficientsWithoutInterceptR = Vectors.dense(4.70011, 7.19943) +// +// assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3) +// assert(modelWithoutIntercept1.coefficients ~= coefficientsWithoutInterceptR relTol 1E-3) +// assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3) +// assert(modelWithoutIntercept2.coefficients ~= coefficientsWithoutInterceptR relTol 1E-3) +// } +// } + + test("linear regression with intercept with L1 regularization") { + Seq("auto", "l-bfgs", "normal").foreach { solver => + val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setSolver(solver) + val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setSolver(solver).setStandardization(false) + + val model1 = trainer1.fit(datasetWithDenseFeature) + val model2 = trainer2.fit(datasetWithDenseFeature) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", + alpha = 1.0, lambda = 0.57 )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.242284 + as.numeric.d1.V2. 4.019605 + as.numeric.d1.V3. 6.679538 + */ + val interceptR1 = 6.242284 + val coefficientsR1 = Vectors.dense(4.019605, 6.679538) + assert(model1.intercept ~== interceptR1 relTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, standardize=FALSE )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.416948 + as.numeric.data.V2. 3.893869 + as.numeric.data.V3. 6.724286 + */ + val interceptR2 = 6.416948 + val coefficientsR2 = Vectors.dense(3.893869, 6.724286) + + assert(model2.intercept ~== interceptR2 relTol 1E-3) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-3) + + testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, + "features", "prediction") { + case Row(features: DenseVector, prediction1: Double) => + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept + assert(prediction1 ~== prediction2 relTol 1E-5) + } + } + } + + test("linear regression without intercept with L1 regularization") { + Seq("auto", "l-bfgs", "normal").foreach { solver => + val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setFitIntercept(false).setSolver(solver) + val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57) + .setFitIntercept(false).setStandardization(false).setSolver(solver) + + val model1 = trainer1.fit(datasetWithDenseFeature) + val model2 = trainer2.fit(datasetWithDenseFeature) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, intercept=FALSE )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.data.V2. 6.272927 + as.numeric.data.V3. 4.782604 + */ + val interceptR1 = 0.0 + val coefficientsR1 = Vectors.dense(6.272927, 4.782604) + + assert(model1.intercept ~== interceptR1 absTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0, + lambda = 0.57, intercept=FALSE, standardize=FALSE )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.data.V2. 6.207817 + as.numeric.data.V3. 4.775780 + */ + val interceptR2 = 0.0 + val coefficientsR2 = Vectors.dense(6.207817, 4.775780) + + assert(model2.intercept ~== interceptR2 absTol 1E-2) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) + + testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, + "features", "prediction") { + case Row(features: DenseVector, prediction1: Double) => + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept + assert(prediction1 ~== prediction2 relTol 1E-5) + } + } + } +// +// test("linear regression with intercept with L2 regularization") { +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) +// .setSolver(solver) +// val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) +// .setStandardization(false).setSolver(solver) +// val model1 = trainer1.fit(datasetWithDenseFeature) +// val model2 = trainer2.fit(datasetWithDenseFeature) +// +// /* +// coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3)) +// > coefficients +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) 5.260103 +// as.numeric.d1.V2. 3.725522 +// as.numeric.d1.V3. 5.711203 +// */ +// val interceptR1 = 5.260103 +// val coefficientsR1 = Vectors.dense(3.725522, 5.711203) +// +// assert(model1.intercept ~== interceptR1 relTol 1E-2) +// assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) +// +// /* +// coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, +// standardize=FALSE)) +// > coefficients +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) 5.790885 +// as.numeric.d1.V2. 3.432373 +// as.numeric.d1.V3. 5.919196 +// */ +// val interceptR2 = 5.790885 +// val coefficientsR2 = Vectors.dense(3.432373, 5.919196) +// +// assert(model2.intercept ~== interceptR2 relTol 1E-2) +// assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) +// +// testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, +// "features", "prediction") { +// case Row(features: DenseVector, prediction1: Double) => +// val prediction2 = +// features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + +// model1.intercept +// assert(prediction1 ~== prediction2 relTol 1E-5) +// } +// } +// } +// +// test("linear regression without intercept with L2 regularization") { +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) +// .setFitIntercept(false).setSolver(solver) +// val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3) +// .setFitIntercept(false).setStandardization(false).setSolver(solver) +// val model1 = trainer1.fit(datasetWithDenseFeature) +// val model2 = trainer2.fit(datasetWithDenseFeature) +// +// /* +// coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, +// intercept = FALSE)) +// > coefficients +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) . +// as.numeric.d1.V2. 5.493430 +// as.numeric.d1.V3. 4.223082 +// */ +// val interceptR1 = 0.0 +// val coefficientsR1 = Vectors.dense(5.493430, 4.223082) +// +// assert(model1.intercept ~== interceptR1 absTol 1E-2) +// assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) +// +// /* +// coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3, +// intercept = FALSE, standardize=FALSE)) +// > coefficients +// 3 x 1 sparse Matrix of class "dgCMatrix" +// s0 +// (Intercept) . +// as.numeric.d1.V2. 5.244324 +// as.numeric.d1.V3. 4.203106 +// */ +// val interceptR2 = 0.0 +// val coefficientsR2 = Vectors.dense(5.244324, 4.203106) +// +// assert(model2.intercept ~== interceptR2 absTol 1E-2) +// assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) +// +// testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, +// "features", "prediction") { +// case Row(features: DenseVector, prediction1: Double) => +// val prediction2 = +// features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + +// model1.intercept +// assert(prediction1 ~== prediction2 relTol 1E-5) +// } +// } +// } + + test("linear regression with intercept with ElasticNet regularization") { + Seq("auto", "l-bfgs", "normal").foreach { solver => + val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setSolver(solver) + val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setStandardization(false).setSolver(solver) + + val model1 = trainer1.fit(datasetWithDenseFeature) + val model2 = trainer2.fit(datasetWithDenseFeature) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6 )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 5.689855 + as.numeric.d1.V2. 3.661181 + as.numeric.d1.V3. 6.000274 + */ + val interceptR1 = 5.689855 + val coefficientsR1 = Vectors.dense(3.661181, 6.000274) + + assert(model1.intercept ~== interceptR1 relTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6 + standardize=FALSE)) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) 6.113890 + as.numeric.d1.V2. 3.407021 + as.numeric.d1.V3. 6.152512 + */ + val interceptR2 = 6.113890 + val coefficientsR2 = Vectors.dense(3.407021, 6.152512) + + assert(model2.intercept ~== interceptR2 relTol 1E-2) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) + + testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, + "features", "prediction") { + case Row(features: DenseVector, prediction1: Double) => + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept + assert(prediction1 ~== prediction2 relTol 1E-5) + } + } + } + + test("linear regression without intercept with ElasticNet regularization") { + Seq("auto", "l-bfgs", "normal").foreach { solver => + val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setFitIntercept(false).setSolver(solver) + val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6) + .setFitIntercept(false).setStandardization(false).setSolver(solver) + + val model1 = trainer1.fit(datasetWithDenseFeature) + val model2 = trainer2.fit(datasetWithDenseFeature) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6, intercept=FALSE )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.d1.V2. 5.643748 + as.numeric.d1.V3. 4.331519 + */ + val interceptR1 = 0.0 + val coefficientsR1 = Vectors.dense(5.643748, 4.331519) + + assert(model1.intercept ~== interceptR1 absTol 1E-2) + assert(model1.coefficients ~= coefficientsR1 relTol 1E-2) + + /* + coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, + lambda = 1.6, intercept=FALSE, standardize=FALSE )) + > coefficients + 3 x 1 sparse Matrix of class "dgCMatrix" + s0 + (Intercept) . + as.numeric.d1.V2. 5.455902 + as.numeric.d1.V3. 4.312266 + + */ + val interceptR2 = 0.0 + val coefficientsR2 = Vectors.dense(5.455902, 4.312266) + + assert(model2.intercept ~== interceptR2 absTol 1E-2) + assert(model2.coefficients ~= coefficientsR2 relTol 1E-2) + + testTransformer[(Double, Vector)](datasetWithDenseFeature, model1, + "features", "prediction") { + case Row(features: DenseVector, prediction1: Double) => + val prediction2 = + features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) + + model1.intercept + assert(prediction1 ~== prediction2 relTol 1E-5) + } + } + } + + test("prediction on single instance") { + val trainer = new LinearRegression + val model = trainer.fit(datasetWithDenseFeature) + + testPredictionModelSinglePrediction(model, datasetWithDenseFeature) + } + +// test("linear regression model with constant label") { +// /* +// R code: +// for (formula in c(b.const ~ . -1, b.const ~ .)) { +// model <- lm(formula, data=df.const.label, weights=w) +// print(as.vector(coef(model))) +// } +// [1] -9.221298 3.394343 +// [1] 17 0 0 +// */ +// val expected = Seq( +// Vectors.dense(0.0, -9.221298, 3.394343), +// Vectors.dense(17.0, 0.0, 0.0)) +// +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// var idx = 0 +// for (fitIntercept <- Seq(false, true)) { +// val model1 = new LinearRegression() +// .setFitIntercept(fitIntercept) +// .setWeightCol("weight") +// .setPredictionCol("myPrediction") +// .setSolver(solver) +// .fit(datasetWithWeightConstantLabel) +// val actual1 = Vectors.dense(model1.intercept, model1.coefficients(0), +// model1.coefficients(1)) +// assert(actual1 ~== expected(idx) absTol 1e-4) +// +// // Schema of summary.predictions should be a superset of the input dataset +// assert((datasetWithWeightConstantLabel.schema.fieldNames.toSet + model1.getPredictionCol) +// .subsetOf(model1.summary.predictions.schema.fieldNames.toSet)) +// +// val model2 = new LinearRegression() +// .setFitIntercept(fitIntercept) +// .setWeightCol("weight") +// .setPredictionCol("myPrediction") +// .setSolver(solver) +// .fit(datasetWithWeightZeroLabel) +// val actual2 = Vectors.dense(model2.intercept, model2.coefficients(0), +// model2.coefficients(1)) +// assert(actual2 ~== Vectors.dense(0.0, 0.0, 0.0) absTol 1e-4) +// +// // Schema of summary.predictions should be a superset of the input dataset +// assert((datasetWithWeightZeroLabel.schema.fieldNames.toSet + model2.getPredictionCol) +// .subsetOf(model2.summary.predictions.schema.fieldNames.toSet)) +// +// idx += 1 +// } +// } +// } + +// test("regularized linear regression through origin with constant label") { +// // The problem is ill-defined if fitIntercept=false, regParam is non-zero. +// // An exception is thrown in this case. +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// for (standardization <- Seq(false, true)) { +// val model = new LinearRegression().setFitIntercept(false) +// .setRegParam(0.1).setStandardization(standardization).setSolver(solver) +// intercept[IllegalArgumentException] { +// model.fit(datasetWithWeightConstantLabel) +// } +// } +// } +// } + + test("linear regression with l-bfgs when training is not needed") { + // When label is constant, l-bfgs solver returns results without training. + // There are two possibilities: If the label is non-zero but constant, + // and fitIntercept is true, then the model return yMean as intercept without training. + // If label is all zeros, then all coefficients are zero regardless of fitIntercept, so + // no training is needed. + for (fitIntercept <- Seq(false, true)) { + for (standardization <- Seq(false, true)) { + val model1 = new LinearRegression() + .setFitIntercept(fitIntercept) + .setStandardization(standardization) + .setWeightCol("weight") + .setSolver("l-bfgs") + .fit(datasetWithWeightConstantLabel) + if (fitIntercept) { + assert(model1.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) + } + val model2 = new LinearRegression() + .setFitIntercept(fitIntercept) + .setWeightCol("weight") + .setSolver("l-bfgs") + .fit(datasetWithWeightZeroLabel) + assert(model2.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4) + } + } + } + +// test("linear regression model training summary") { +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer = new LinearRegression().setSolver(solver).setPredictionCol("myPrediction") +// val model = trainer.fit(datasetWithDenseFeature) +// val trainerNoPredictionCol = trainer.setPredictionCol("") +// val modelNoPredictionCol = trainerNoPredictionCol.fit(datasetWithDenseFeature) +// +// // Training results for the model should be available +// assert(model.hasSummary) +// assert(modelNoPredictionCol.hasSummary) +// +// // Schema should be a superset of the input dataset +// assert((datasetWithDenseFeature.schema.fieldNames.toSet + model.getPredictionCol).subsetOf( +// model.summary.predictions.schema.fieldNames.toSet)) +// // Validate that we re-insert a prediction column for evaluation +// val modelNoPredictionColFieldNames +// = modelNoPredictionCol.summary.predictions.schema.fieldNames +// assert(datasetWithDenseFeature.schema.fieldNames.toSet.subsetOf( +// modelNoPredictionColFieldNames.toSet)) +// assert(modelNoPredictionColFieldNames.exists(s => s.startsWith("prediction_"))) +// +// // Residuals in [[LinearRegressionResults]] should equal those manually computed +// datasetWithDenseFeature.select("features", "label") +// .rdd +// .map { case Row(features: DenseVector, label: Double) => +// val prediction = +// features(0) * model.coefficients(0) + features(1) * model.coefficients(1) + +// model.intercept +// label - prediction +// } +// .zip(model.summary.residuals.rdd.map(_.getDouble(0))) +// .collect() +// .foreach { case (manualResidual: Double, resultResidual: Double) => +// assert(manualResidual ~== resultResidual relTol 1E-5) +// } +// +// /* +// # Use the following R code to generate model training results. +// +// # path/part-00000 is the file generated by running LinearDataGenerator.generateLinearInput +// # as described before the beforeAll() method. +// d1 <- read.csv("path/part-00000", header=FALSE, stringsAsFactors=FALSE) +// fit <- glm(V1 ~ V2 + V3, data = d1, family = "gaussian") +// names(f1)[1] = c("V2") +// names(f1)[2] = c("V3") +// f1 <- data.frame(as.numeric(d1$V2), as.numeric(d1$V3)) +// predictions <- predict(fit, newdata=f1) +// l1 <- as.numeric(d1$V1) +// +// residuals <- l1 - predictions +// > mean(residuals^2) # MSE +// [1] 0.00985449 +// > mean(abs(residuals)) # MAD +// [1] 0.07961668 +// > cor(predictions, l1)^2 # r^2 +// [1] 0.9998737 +// +// > summary(fit) +// +// Call: +// glm(formula = V1 ~ V2 + V3, family = "gaussian", data = d1) +// +// Deviance Residuals: +// Min 1Q Median 3Q Max +// -0.47082 -0.06797 0.00002 0.06725 0.34635 +// +// Coefficients: +// Estimate Std. Error t value Pr(>|t|) +// (Intercept) 6.3022157 0.0018600 3388 <2e-16 *** +// V2 4.6982442 0.0011805 3980 <2e-16 *** +// V3 7.1994344 0.0009044 7961 <2e-16 *** +// +// # R code for r2adj +// lm_fit <- lm(V1 ~ V2 + V3, data = d1) +// summary(lm_fit)$adj.r.squared +// [1] 0.9998736 +// --- +// +// .... +// */ +// assert(model.summary.meanSquaredError ~== 0.00985449 relTol 1E-4) +// assert(model.summary.meanAbsoluteError ~== 0.07961668 relTol 1E-4) +// assert(model.summary.r2 ~== 0.9998737 relTol 1E-4) +// assert(model.summary.r2adj ~== 0.9998736 relTol 1E-4) +// +// // Normal solver uses "WeightedLeastSquares". If no regularization is applied or only L2 +// // regularization is applied, this algorithm uses a direct solver and does not generate an +// // objective history because it does not run through iterations. +// if (solver == "l-bfgs") { +// // Objective function should be monotonically decreasing for linear regression +// assert( +// model.summary +// .objectiveHistory +// .sliding(2) +// .forall(x => x(0) >= x(1))) +// } else { +// // To clarify that the normal solver is used here. +// assert(model.summary.objectiveHistory.length == 1) +// assert(model.summary.objectiveHistory(0) == 0.0) +// val devianceResidualsR = Array(-0.47082, 0.34635) +// val seCoefR = Array(0.0011805, 0.0009044, 0.0018600) +// val tValsR = Array(3980, 7961, 3388) +// val pValsR = Array(0, 0, 0) +// model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x => +// assert(x._1 ~== x._2 absTol 1E-4) } +// model.summary.coefficientStandardErrors.zip(seCoefR).foreach { x => +// assert(x._1 ~== x._2 absTol 1E-4) } +// model.summary.tValues.map(_.round).zip(tValsR).foreach{ x => assert(x._1 === x._2) } +// model.summary.pValues.map(_.round).zip(pValsR).foreach{ x => assert(x._1 === x._2) } +// } +// } +// } +// +// test("linear regression model testset evaluation summary") { +// Seq("auto", "l-bfgs", "normal").foreach { solver => +// val trainer = new LinearRegression().setSolver(solver) +// val model = trainer.fit(datasetWithDenseFeature) +// +// // Evaluating on training dataset should yield results summary equal to training summary +// val testSummary = model.evaluate(datasetWithDenseFeature) +// assert(model.summary.meanSquaredError ~== testSummary.meanSquaredError relTol 1E-5) +// assert(model.summary.r2 ~== testSummary.r2 relTol 1E-5) +// model.summary.residuals.select("residuals").collect() +// .zip(testSummary.residuals.select("residuals").collect()) +// .forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 } +// } +// } +// +// test("linear regression with weighted samples") { +// val sqlContext = spark.sqlContext +// import sqlContext.implicits._ +// val numClasses = 0 +// def modelEquals(m1: LinearRegressionModel, m2: LinearRegressionModel): Unit = { +// assert(m1.coefficients ~== m2.coefficients relTol 0.01) +// assert(m1.intercept ~== m2.intercept relTol 0.01) +// } +// val testParams = Seq( +// // (elasticNetParam, regParam, fitIntercept, standardization) +// (0.0, 0.21, true, true), +// (0.0, 0.21, true, false), +// (0.0, 0.21, false, false), +// (1.0, 0.21, true, true) +// ) +// +// // For squaredError loss +// for (solver <- Seq("auto", "l-bfgs", "normal"); +// (elasticNetParam, regParam, fitIntercept, standardization) <- testParams) { +// val estimator = new LinearRegression() +// .setFitIntercept(fitIntercept) +// .setStandardization(standardization) +// .setRegParam(regParam) +// .setElasticNetParam(elasticNetParam) +// .setSolver(solver) +// .setMaxIter(1) +// MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression]( +// datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals) +// MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression]( +// datasetWithStrongNoise.as[LabeledPoint], estimator, numClasses, modelEquals, +// outlierRatio = 3) +// MLTestingUtils.testOversamplingVsWeighting[LinearRegressionModel, LinearRegression]( +// datasetWithStrongNoise.as[LabeledPoint], estimator, modelEquals, seed) +// } +// +// // For huber loss +// for ((_, regParam, fitIntercept, standardization) <- testParams) { +// val estimator = new LinearRegression() +// .setLoss("huber") +// .setFitIntercept(fitIntercept) +// .setStandardization(standardization) +// .setRegParam(regParam) +// .setMaxIter(1) +// MLTestingUtils.testArbitrarilyScaledWeights[LinearRegressionModel, LinearRegression]( +// datasetWithOutlier.as[LabeledPoint], estimator, modelEquals) +// MLTestingUtils.testOutliersWithSmallWeights[LinearRegressionModel, LinearRegression]( +// datasetWithOutlier.as[LabeledPoint], estimator, numClasses, modelEquals, +// outlierRatio = 3) +// MLTestingUtils.testOversamplingVsWeighting[LinearRegressionModel, LinearRegression]( +// datasetWithOutlier.as[LabeledPoint], estimator, modelEquals, seed) +// } +// } + + test("linear regression model with l-bfgs with big feature datasets") { + val trainer = new LinearRegression().setSolver("auto") + val model = trainer.fit(datasetWithSparseFeature) + + // Training results for the model should be available + assert(model.hasSummary) + // When LBFGS is used as optimizer, objective history can be restored. + assert( + model.summary + .objectiveHistory + .sliding(2) + .forall(x => x(0) >= x(1))) + } + +// test("linear regression summary with weighted samples and intercept by normal solver") { +// /* +// R code: +// +// model <- glm(formula = "b ~ .", data = df, weights = w) +// summary(model) +// +// Call: +// glm(formula = "b ~ .", data = df, weights = w) +// +// Deviance Residuals: +// 1 2 3 4 +// 1.920 -1.358 -1.109 0.960 +// +// Coefficients: +// Estimate Std. Error t value Pr(>|t|) +// (Intercept) 18.080 9.608 1.882 0.311 +// V1 6.080 5.556 1.094 0.471 +// V2 -0.600 1.960 -0.306 0.811 +// +// (Dispersion parameter for gaussian family taken to be 7.68) +// +// Null deviance: 202.00 on 3 degrees of freedom +// Residual deviance: 7.68 on 1 degrees of freedom +// AIC: 18.783 +// +// Number of Fisher Scoring iterations: 2 +// */ +// +// val model = new LinearRegression() +// .setWeightCol("weight") +// .setSolver("normal") +// .fit(datasetWithWeight) +// val coefficientsR = Vectors.dense(Array(6.080, -0.600)) +// val interceptR = 18.080 +// val devianceResidualsR = Array(-1.358, 1.920) +// val seCoefR = Array(5.556, 1.960, 9.608) +// val tValsR = Array(1.094, -0.306, 1.882) +// val pValsR = Array(0.471, 0.811, 0.311) +// +// assert(model.coefficients ~== coefficientsR absTol 1E-3) +// assert(model.intercept ~== interceptR absTol 1E-3) +// model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x => +// assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x => +// assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) } +// +// val modelWithL1 = new LinearRegression() +// .setWeightCol("weight") +// .setSolver("normal") +// .setRegParam(0.5) +// .setElasticNetParam(1.0) +// .fit(datasetWithWeight) +// +// assert(modelWithL1.summary.objectiveHistory !== Array(0.0)) +// assert( +// modelWithL1.summary +// .objectiveHistory +// .sliding(2) +// .forall(x => x(0) >= x(1))) +// } + +// test("linear regression summary with weighted samples and w/o intercept by normal solver") { +// /* +// R code: +// +// model <- glm(formula = "b ~ . -1", data = df, weights = w) +// summary(model) +// +// Call: +// glm(formula = "b ~ . -1", data = df, weights = w) +// +// Deviance Residuals: +// 1 2 3 4 +// 1.950 2.344 -4.600 2.103 +// +// Coefficients: +// Estimate Std. Error t value Pr(>|t|) +// V1 -3.7271 2.9032 -1.284 0.3279 +// V2 3.0100 0.6022 4.998 0.0378 * +// --- +// Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +// +// (Dispersion parameter for gaussian family taken to be 17.4376) +// +// Null deviance: 5962.000 on 4 degrees of freedom +// Residual deviance: 34.875 on 2 degrees of freedom +// AIC: 22.835 +// +// Number of Fisher Scoring iterations: 2 +// */ +// +// val model = new LinearRegression() +// .setWeightCol("weight") +// .setSolver("normal") +// .setFitIntercept(false) +// .fit(datasetWithWeight) +// val coefficientsR = Vectors.dense(Array(-3.7271, 3.0100)) +// val interceptR = 0.0 +// val devianceResidualsR = Array(-4.600, 2.344) +// val seCoefR = Array(2.9032, 0.6022) +// val tValsR = Array(-1.284, 4.998) +// val pValsR = Array(0.3279, 0.0378) +// +// assert(model.coefficients ~== coefficientsR absTol 1E-3) +// assert(model.intercept === interceptR) +// model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x => +// assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x => +// assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) } +// model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) } +// } + + test("read/write") { + def checkModelData(model: LinearRegressionModel, model2: LinearRegressionModel): Unit = { + assert(model.intercept === model2.intercept) + assert(model.coefficients === model2.coefficients) + } + val lr = new LinearRegression() + testEstimatorAndModelReadWrite(lr, datasetWithWeight, LinearRegressionSuite.allParamSettings, + LinearRegressionSuite.allParamSettings, checkModelData) + } + +// test("pmml export") { +// val lr = new LinearRegression() +// val model = lr.fit(datasetWithWeight) +// def checkModel(pmml: PMML): Unit = { +// val dd = pmml.getDataDictionary +// assert(dd.getNumberOfFields === 3) +// val fields = dd.getDataFields.asScala +// assert(fields(0).getName().toString === "field_0") +// assert(fields(0).getOpType() == OpType.CONTINUOUS) +// val pmmlRegressionModel = pmml.getModels().get(0).asInstanceOf[PMMLRegressionModel] +// val pmmlPredictors = pmmlRegressionModel.getRegressionTables.get(0).getNumericPredictors +// val pmmlWeights = pmmlPredictors.asScala.map(_.getCoefficient()).toList +// assert(pmmlWeights(0) ~== model.coefficients(0) relTol 1E-3) +// assert(pmmlWeights(1) ~== model.coefficients(1) relTol 1E-3) +// } +// testPMMLWrite(sc, model, checkModel) +// } + +// test("should support all NumericType labels and weights, and not support other types") { +// for (solver <- Seq("auto", "l-bfgs", "normal")) { +// val lr = new LinearRegression().setMaxIter(1).setSolver(solver) +// MLTestingUtils.checkNumericTypes[LinearRegressionModel, LinearRegression]( +// lr, spark, isClassification = false) { (expected, actual) => +// assert(expected.intercept === actual.intercept) +// assert(expected.coefficients === actual.coefficients) +// } +// } +// } + + test("linear regression (huber loss) with intercept without regularization") { + val trainer1 = (new LinearRegression).setLoss("huber") + .setFitIntercept(true).setStandardization(true) + val trainer2 = (new LinearRegression).setLoss("huber") + .setFitIntercept(true).setStandardization(false) + + val model1 = trainer1.fit(datasetWithOutlier) + val model2 = trainer2.fit(datasetWithOutlier) + + /* + Using the following Python code to load the data and train the model using + scikit-learn package. + + import pandas as pd + import numpy as np + from sklearn.linear_model import HuberRegressor + df = pd.read_csv("path", header = None) + X = df[df.columns[1:3]] + y = np.array(df[df.columns[0]]) + huber = HuberRegressor(fit_intercept=True, alpha=0.0, max_iter=100, epsilon=1.35) + huber.fit(X, y) + + >>> huber.coef_ + array([ 4.68998007, 7.19429011]) + >>> huber.intercept_ + 6.3002404351083037 + >>> huber.scale_ + 0.077810159205220747 + */ + val coefficientsPy = Vectors.dense(4.68998007, 7.19429011) + val interceptPy = 6.30024044 + val scalePy = 0.07781016 + + assert(model1.coefficients ~= coefficientsPy relTol 1E-3) + assert(model1.intercept ~== interceptPy relTol 1E-3) + assert(model1.scale ~== scalePy relTol 1E-3) + + // Without regularization, with or without standardization will converge to the same solution. + assert(model2.coefficients ~= coefficientsPy relTol 1E-3) + assert(model2.intercept ~== interceptPy relTol 1E-3) + assert(model2.scale ~== scalePy relTol 1E-3) + } + + test("linear regression (huber loss) without intercept without regularization") { + val trainer1 = (new LinearRegression).setLoss("huber") + .setFitIntercept(false).setStandardization(true) + val trainer2 = (new LinearRegression).setLoss("huber") + .setFitIntercept(false).setStandardization(false) + + val model1 = trainer1.fit(datasetWithOutlier) + val model2 = trainer2.fit(datasetWithOutlier) + + /* + huber = HuberRegressor(fit_intercept=False, alpha=0.0, max_iter=100, epsilon=1.35) + huber.fit(X, y) + + >>> huber.coef_ + array([ 6.71756703, 5.08873222]) + >>> huber.intercept_ + 0.0 + >>> huber.scale_ + 2.5560209922722317 + */ + val coefficientsPy = Vectors.dense(6.71756703, 5.08873222) + val interceptPy = 0.0 + val scalePy = 2.55602099 + + assert(model1.coefficients ~= coefficientsPy relTol 1E-3) + assert(model1.intercept === interceptPy) + assert(model1.scale ~== scalePy relTol 1E-3) + + // Without regularization, with or without standardization will converge to the same solution. + assert(model2.coefficients ~= coefficientsPy relTol 1E-3) + assert(model2.intercept === interceptPy) + assert(model2.scale ~== scalePy relTol 1E-3) + } + + test("linear regression (huber loss) with intercept with L2 regularization") { + val trainer1 = (new LinearRegression).setLoss("huber") + .setFitIntercept(true).setRegParam(0.21).setStandardization(true) + val trainer2 = (new LinearRegression).setLoss("huber") + .setFitIntercept(true).setRegParam(0.21).setStandardization(false) + + val model1 = trainer1.fit(datasetWithOutlier) + val model2 = trainer2.fit(datasetWithOutlier) + + /* + Since scikit-learn HuberRegressor does not support standardization, + we do it manually out of the estimator. + + xStd = np.std(X, axis=0) + scaledX = X / xStd + huber = HuberRegressor(fit_intercept=True, alpha=210, max_iter=100, epsilon=1.35) + huber.fit(scaledX, y) + + >>> np.array(huber.coef_ / xStd) + array([ 1.97732633, 3.38816722]) + >>> huber.intercept_ + 3.7527581430531227 + >>> huber.scale_ + 3.787363673371801 + */ + val coefficientsPy1 = Vectors.dense(1.97732633, 3.38816722) + val interceptPy1 = 3.75275814 + val scalePy1 = 3.78736367 + + assert(model1.coefficients ~= coefficientsPy1 relTol 1E-2) + assert(model1.intercept ~== interceptPy1 relTol 1E-2) + assert(model1.scale ~== scalePy1 relTol 1E-2) + + /* + huber = HuberRegressor(fit_intercept=True, alpha=210, max_iter=100, epsilon=1.35) + huber.fit(X, y) + + >>> huber.coef_ + array([ 1.73346444, 3.63746999]) + >>> huber.intercept_ + 4.3017134790781739 + >>> huber.scale_ + 3.6472742809286793 + */ + val coefficientsPy2 = Vectors.dense(1.73346444, 3.63746999) + val interceptPy2 = 4.30171347 + val scalePy2 = 3.64727428 + + assert(model2.coefficients ~= coefficientsPy2 relTol 1E-3) + assert(model2.intercept ~== interceptPy2 relTol 1E-3) + assert(model2.scale ~== scalePy2 relTol 1E-3) + } + + test("linear regression (huber loss) without intercept with L2 regularization") { + val trainer1 = (new LinearRegression).setLoss("huber") + .setFitIntercept(false).setRegParam(0.21).setStandardization(true) + val trainer2 = (new LinearRegression).setLoss("huber") + .setFitIntercept(false).setRegParam(0.21).setStandardization(false) + + val model1 = trainer1.fit(datasetWithOutlier) + val model2 = trainer2.fit(datasetWithOutlier) + + /* + Since scikit-learn HuberRegressor does not support standardization, + we do it manually out of the estimator. + + xStd = np.std(X, axis=0) + scaledX = X / xStd + huber = HuberRegressor(fit_intercept=False, alpha=210, max_iter=100, epsilon=1.35) + huber.fit(scaledX, y) + + >>> np.array(huber.coef_ / xStd) + array([ 2.59679008, 2.26973102]) + >>> huber.intercept_ + 0.0 + >>> huber.scale_ + 4.5766311924091791 + */ + val coefficientsPy1 = Vectors.dense(2.59679008, 2.26973102) + val interceptPy1 = 0.0 + val scalePy1 = 4.57663119 + + assert(model1.coefficients ~= coefficientsPy1 relTol 1E-2) + assert(model1.intercept === interceptPy1) + assert(model1.scale ~== scalePy1 relTol 1E-2) + + /* + huber = HuberRegressor(fit_intercept=False, alpha=210, max_iter=100, epsilon=1.35) + huber.fit(X, y) + + >>> huber.coef_ + array([ 2.28423908, 2.25196887]) + >>> huber.intercept_ + 0.0 + >>> huber.scale_ + 4.5979643506051753 + */ + val coefficientsPy2 = Vectors.dense(2.28423908, 2.25196887) + val interceptPy2 = 0.0 + val scalePy2 = 4.59796435 + + assert(model2.coefficients ~= coefficientsPy2 relTol 1E-3) + assert(model2.intercept === interceptPy2) + assert(model2.scale ~== scalePy2 relTol 1E-3) + } + + test("huber loss model match squared error for large epsilon") { + val trainer1 = new LinearRegression().setLoss("huber").setEpsilon(1E5) + val model1 = trainer1.fit(datasetWithOutlier) + val trainer2 = new LinearRegression() + val model2 = trainer2.fit(datasetWithOutlier) + assert(model1.coefficients ~== model2.coefficients relTol 1E-3) + assert(model1.intercept ~== model2.intercept relTol 1E-3) + } +} + +object LinearRegressionSuite { + + /** + * Mapping from all Params to valid settings which differ from the defaults. + * This is useful for tests which need to exercise all Params, such as save/load. + * This excludes input columns to simplify some tests. + */ + val allParamSettings: Map[String, Any] = Map( + "predictionCol" -> "myPrediction", + "regParam" -> 0.01, + "elasticNetParam" -> 0.1, + "maxIter" -> 2, // intentionally small + "fitIntercept" -> true, + "tol" -> 0.8, + "standardization" -> false, + "solver" -> "l-bfgs" + ) +} diff --git a/mllib-dal/test.sh b/mllib-dal/test.sh index d7df508fe..1cd5f836a 100755 --- a/mllib-dal/test.sh +++ b/mllib-dal/test.sh @@ -47,11 +47,13 @@ mvn clean # Individual test if [[ -z $SPARK_VER ]]; then - mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test - mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test +# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test +# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test + mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.regression.IntelLinearRegressionSuite test # mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test else - mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test - mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test +# mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test +# mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test + mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.regression.IntelLinearRegressionSuite test # mvn -P$SPARK_VER -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test fi