From ee66641417fd23914d288092a8382a4fb8dcf20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 14:19:34 +0200 Subject: [PATCH 01/13] Remove HHVM from Travis CI builds, HHVM has announced that they no longer intend to maintain compatibility with PHP, https://hhvm.com/blog/2017/09/18/the-future-of-hhvm.html --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 16ba0c7..5557094 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,6 @@ language: php php: - 5.6 - 7.0 - - hhvm before_script: - composer install From 2595325dc90b7e08de7989df27278d32e0a60ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 14:08:49 +0200 Subject: [PATCH 02/13] Add test for mac line breaks in text field --- tests/CsvReadTest.php | 18 ++++++++++++++++++ tests/data/test-input.lineBreaks.csv | 2 ++ 2 files changed, 20 insertions(+) create mode 100644 tests/data/test-input.lineBreaks.csv diff --git a/tests/CsvReadTest.php b/tests/CsvReadTest.php index c6bff66..ae9e77b 100644 --- a/tests/CsvReadTest.php +++ b/tests/CsvReadTest.php @@ -137,6 +137,24 @@ public function testParseEscapedBy() self::assertEquals($expected, iterator_to_array($csvFile)); } + public function testParseMacLineEndsInField() + { + $csvFile = new CsvReader(__DIR__ . '/data/test-input.lineBreaks.csv', ",", '"', '\\'); + + $expected = [ + [ + 'test', + "some text\rwith\r\\r line breaks\rinside\rbut\rrows\rare\rusing \\n \\\"line\\\" break\r", + ], + [ + 'name', 'data' + ] + ]; + + self::assertEquals($expected, iterator_to_array($csvFile)); + } + + public function testEmptyHeader() { $csvFile = new CsvReader(__DIR__ . '/data/test-input.empty.csv', ',', '"'); diff --git a/tests/data/test-input.lineBreaks.csv b/tests/data/test-input.lineBreaks.csv new file mode 100644 index 0000000..ed4ce9d --- /dev/null +++ b/tests/data/test-input.lineBreaks.csv @@ -0,0 +1,2 @@ +"test","some text with \r line breaks inside but rows are using \n \"line\" break " +"name","data" From c25316ec36b613cdad3dfa1f185c3e2e27b87a8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 14:20:14 +0200 Subject: [PATCH 03/13] Add LineBreaksHelper --- composer.json | 3 +- src/LineBreaksHelper.php | 96 ++++++++++++++++++++++ tests/LineBreaksHelperTest.php | 142 +++++++++++++++++++++++++++++++++ 3 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 src/LineBreaksHelper.php create mode 100644 tests/LineBreaksHelperTest.php diff --git a/composer.json b/composer.json index f18e95e..812d5d3 100644 --- a/composer.json +++ b/composer.json @@ -29,6 +29,7 @@ }, "require-dev": { "phpunit/phpunit": "^5.7", - "squizlabs/php_codesniffer": "^3.2" + "squizlabs/php_codesniffer": "^3.2", + "ext-json": "*" } } diff --git a/src/LineBreaksHelper.php b/src/LineBreaksHelper.php new file mode 100644 index 0000000..4c1d1e5 --- /dev/null +++ b/src/LineBreaksHelper.php @@ -0,0 +1,96 @@ +(?>"")|[^"])*"~ + * enclosure: |"|, escapedBy: |\|, regexp: ~"(?>(?>\\"|\\\\)|[^"])*"~ + */ + $regexpDelimiter = '~'; + $regexp = + // regexp start + $regexpDelimiter . + // enclosure start + preg_quote($enclosure, $regexpDelimiter) . + /* + * Once-only group => if there is a match, do not try other alternatives + * See: https://www.php.net/manual/en/regexp.reference.onlyonce.php + * Without once-only group will be |"abc\"| false positive, + * because |\| is matched by group and |"| at the end of regexp. + */ + // repeated once-only group start + '(?>' . + // once-only group start + '(?>' . + // escaped enclosure + preg_quote($escapedEnclosure, $regexpDelimiter) . + // OR escaped escape char + ($escapedEscape ? '|' . preg_quote($escapedEscape, $regexpDelimiter) : '') . + // group end + ')' . + // OR not enclosure + '|[^' . preg_quote($enclosure, $regexpDelimiter) . ']' . + // group end + ')*' . + // enclosure end + preg_quote($enclosure, $regexpDelimiter) . + // regexp end + $regexpDelimiter; + + return preg_replace($regexp, $doubleEnclosure, $sample); + } +} diff --git a/tests/LineBreaksHelperTest.php b/tests/LineBreaksHelperTest.php new file mode 100644 index 0000000..584cb8b --- /dev/null +++ b/tests/LineBreaksHelperTest.php @@ -0,0 +1,142 @@ + "\n", + 'r' => "\r", + "r-n" => "\r\n", + ]; + + yield "empty" => [ + '"', + '', + '', + '', + "\n" + ]; + + yield "empty-escaped-by" => [ + '"', + '\\', + '', + '', + "\n" + ]; + + foreach ($lineEnds as $prefix => $lineEnd) { + yield "$prefix-simple" => [ + '"', + '', + implode($lineEnd, [ + 'col1,col2', + 'line without enclosure,second column', + '"enclosure "" in column","hello \"', + '"line with enclosure","second column"', + '"column with enclosure "", and comma inside text","second column enclosure in text """', + ]), + implode($lineEnd, [ + 'col1,col2', + 'line without enclosure,second column', + '"",""', + '"",""', + '"",""', + ]), + $lineEnd + ]; + + yield "$prefix-simple-escaped-by" => [ + '"', + '\\', + implode($lineEnd, [ + 'col1,col2', + 'line without enclosure,second column', + '"enclosure \" in column","hello \\\\"', + '"line with enclosure","second column"', + '"column with enclosure \", and comma inside text","second column enclosure in text \""' + ]), + implode($lineEnd, [ + 'col1,col2', + 'line without enclosure,second column', + '"",""', + '"",""', + '"",""', + ]), + $lineEnd + ]; + + yield "$prefix-multiline-n" => [ + '"', + '', + implode($lineEnd, [ + "\"xyz\",\"\n\n\nabc\n\n\n\"\"\n\n\nxyz\n\n\n\"", + '"abc","def"', + ]), + implode($lineEnd, [ + '"",""', + '"",""', + ]), + $lineEnd + ]; + + yield "$prefix-multiline-r" => [ + '"', + '', + implode($lineEnd, [ + "\"xyz\",\"\r\r\rabc\r\r\r\"\"\r\r\rxyz\r\r\r\"", + '"abc","def"', + ]), + implode($lineEnd, [ + '"",""', + '"",""', + ]), + $lineEnd + ]; + + yield "$prefix-multiline-r-n" => [ + '"', + '', + implode($lineEnd, [ + "\"xyz\",\"\r\n\r\n\r\nabc\r\n\r\n\r\n\"\"\r\n\r\n\r\nxyz\r\n\r\n\r\n\"", + '"abc","def"', + ]), + implode($lineEnd, [ + '"",""', + '"",""', + ]), + $lineEnd + ]; + } + + + } +} From f9eaab3de72fa5ecff3ef086323d9a42e20a225e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 14:20:17 +0200 Subject: [PATCH 04/13] Update CsvReader to use LineBreaksHelper --- src/CsvReader.php | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/CsvReader.php b/src/CsvReader.php index a3dc7aa..a0d382b 100644 --- a/src/CsvReader.php +++ b/src/CsvReader.php @@ -114,26 +114,7 @@ protected function detectLineBreak() rewind($this->getFilePointer()); $sample = fread($this->getFilePointer(), 10000); - $possibleLineBreaks = [ - "\r\n", // win - "\r", // mac - "\n", // unix - ]; - - $lineBreaksPositions = []; - foreach ($possibleLineBreaks as $lineBreak) { - $position = strpos($sample, $lineBreak); - if ($position === false) { - continue; - } - $lineBreaksPositions[$lineBreak] = $position; - } - - - asort($lineBreaksPositions); - reset($lineBreaksPositions); - - return empty($lineBreaksPositions) ? "\n" : key($lineBreaksPositions); + return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy()); } /** From e78714dbf7eb42e2231e7001c54503365709a639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 15:16:04 +0200 Subject: [PATCH 05/13] Remove testInvalidNewLines, it is now working --- tests/CsvReadTest.php | 8 -------- tests/data/binary | Bin 21767 -> 0 bytes 2 files changed, 8 deletions(-) delete mode 100644 tests/data/binary diff --git a/tests/CsvReadTest.php b/tests/CsvReadTest.php index ae9e77b..027ca69 100644 --- a/tests/CsvReadTest.php +++ b/tests/CsvReadTest.php @@ -382,14 +382,6 @@ public function invalidSkipLinesProvider() ]; } - public function testInvalidNewLines() - { - self::expectException(Exception::class); - self::expectExceptionMessage('Invalid line break. Please use unix \n or win \r\n line breaks.'); - new CsvReader(__DIR__ . DIRECTORY_SEPARATOR . 'data/binary'); - } - - public function testValidWithoutRewind() { $fileName = __DIR__ . '/data/simple.csv'; diff --git a/tests/data/binary b/tests/data/binary deleted file mode 100644 index 421f1a7bf01390b1b8b558d9086ef1ce188d104c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 21767 zcmeFYgOeuBwmsaoZJX1!ZQHhO+qP}no}RYtX-?bjU%&6U=YHoq_xuOP0!T~9&eqw))>%*4!`{S6 zhtA!`nxGH_h$0^V=zIMC9si5}f#H-1%b_0#B2OWoL36FEl8O{1!{dc>UI9#R4Bych^tYBV8+fFQKllI+ox8{(%=2nW!a-K9m#5MD#sf}@`Qrr)4hTewS@De=g zx(?>CK!O-j@vND>1EpykqjOj8J0$lB;b}apv7}Ug1v_=50#!_>iGCCgvk(#HNlTgEmt@_k3PLd7 z5y@pr5;m_L@}(2+l7(G=QBv)=auX!xM**}rSp}nd_qwI-hk01VPiiuwbvCgL#ubuAaCBS<#YzC)8QeLAaL|_9{o_o9#Trx(FJJ)q|6kbRC1AB(efP+I zM|F>pePO0$QdlKgy-<|sea@`D&fb&GH44T%9%wYlBS|u2bGnl zbYG_Y42Gp;OK~kP@Jb$$-ca1mX-r=zQ@(~YCffM8!R$_E-O>mT%_ z*$IZ#P*~U9b7!7KePtsNAvBN$kP-B6-lp0}nxDs!N0@x9SA-hPdV6-***XGu-{SQ@ z<6~P?H7r~cyi z;H!Y%Z@{EpG)rg^9Vw@}rH8 zgeH89$zyC{whVivqk!e{&VM$-SpS=4y~e+N1MK=nKT0OB`%{>ItABJ7-J!uCMY z^$p+-9uzp~9w1ftOhBS{p*NoNrS;7KJ9UHYlmH|7XQN1zvBR^YRF$@I(#SnkR)-S< zKlaa3^cY>^Esf@C{PDEk$_dFLS%s9EttM^V_0Q#;D#t#RXUaEme7!3yzY!-?D{j1H zCZVmuF&I^p4}QO_Zwy^%+rX~Dv7%Hf1aa$OBY0hmP3?RP;m@$5P<3%K;D8XocpXe8 z=hZ#njBQe&Sa*myTOYfqw3f@n<#bCw0P*munkcdtr7BWD753LjNBtUZE$AU*<+iQ^ z-TnP^noo(w8}tiCzk}eZ{xJTD5az=(u3Q4f-9wrdm&@KN2gTX?L5B(ahs=4oj*3za zpg(dk0*p`Zk>;o<01?|MSXJYm8}T!wmAb|HLsc^n)h!w{+FU;TV!c--2VqDqDpvP- z2!A&T3AYaPL{5g`i>tTw3r+_}dg&V=w#(F&2cxip_y8i9p(3#idZA4y&s>A0HfweS zv(hjs^&KFEE-=BGF)u<2fbam`Nm`|YY3^q11fRwt3w=RFTTrXfJ$Ze4Mh37d0qp{k z^-e8Q4j!vOii?NQ@o+xcrR6N~E)UQn;WXfIv#||ww0;^D@Z%rnmR++QC^}zJGwJIy z7r%FR(v@TXq@Zk6F^Vmh z*@!ES1-c=4QG)Y(7A#jb`y}Yc!O=ce)2WgWB}l9 z&|XtPghHVie4qq(liP37v0=rU@;eUmW_$qKx4LrvhF?JCQG^J((BC?^^D4W8iGd_4ACv#JP!*c6k7T%)&bw2tb9k3J589|P3%}`pK!wg^#KLPERG0c zCP!V}Elo~vVU|ePi_~_4!j}GFjy#Acdh({OXr|25v6?l>{!mgvQh=TJww+w)S{RZ`5E1!mU3B&mojI{rFmzft?*E7 z*%DLa-v2QnfR=O8yma+m1}EoAfM~?CCc9wF+wzn%E)R$akn}e!?xvFhK5h)3X})Za zu5=Z^T@ihA3EH#&%W8LD9waO;(=~_(Or_<4E?DB^p3kvZr$%z+!&!DVEwo$I=z@N` zpn~2wl5lv5CJSiAv-dO4>FuR8^J<(P6%ryyE}^^g7ovbj{Jb#8K=TvOA#J34bQ8?> zdD%l=8^XYlRG;Cl9sY%Vub7FfDegnSaFNsRb)OYNbo|k;(_ADB+v6bvSFkl5e{4;5 zz&eS$-1(AJhS$jYnsqpaZ0Ydrh9XTe4o~qNmMpk{tIig7ZYsA{X~Ua*JX=;mAb2;c zj@6tP%(ojU+LVPP5K20(Ju00bl{=HvU(P_QgX_=U_m~c2p*FzT862O63v}2hMM|4x z5Kj$)&x>Db_Ezepc-@9VX3mv?(D7cFV_HN~;^LHpa?1eq4%Eb!yy1+DvktkKO;^i( zyns$lSE)ufmfgh^G=|eJzVhl3H zeQ&auu93~Hr*QMIi{yB)vIataV7(f-Fb|XQ91^>R6e>AZ4(4xS`?$OXx!@;fh)MpK zk4x^!ZZF+F$>zzXS34IF4g+)-D1Dj@FI4YBF33!Yv?*T~_9r7%j9lO)7b{U%Lu)-S zu5Kh%-ADdQ0&rWFcY>Ds#``qbXwzFXNk6VPIc$EHLGk_xFGY4mt>m$!JQ_81Vh6=b8ct+a zZcl0UIN5LQ$w(z4Wic51iv&fTpb34jjhPazp;~feQl?dmXIx*Jg)ff1UPhR5U^>vg zO4mkL4rG#GvF&_l-y(i?)z$2n?%DnR@p@UgyEV_$R`N4m-KT5Qob5==W+$$mHL>Fy z7#{!SW+;Z%))faDhzE_-5DiFI&Pf{%>D;BF#lh;cw=ncV-S%rAnCTTGqirbJ&eJSj z+33BGzr8N?!X1+puNEk1cP@wyP6i10tgOa+z6_nCBG7)NG!q>z04a?LKX?wNf62ap zcP+V>A2EGo%C$!smCPv7!d7(0-WDtlsly}0VcIkXmqTG6+Yi-JaYN-@rvJj`zO-k< zvd_0GR@AQv_dG!M^*E{A+C7U5`4oy#;*l9_=p^fW9D!)4-W7J}sJ~Ggn*~L#rGjO5 zCiZ%6QPE3MiFJaRmy2bpop4GeWUJPejf?~TNi>D1NHWEU_H{+#1T05d@X6ApFC!xv z&*$NU()@G9i~QNOp?NEEsp14%c4K!g8;SgS$dSEpN$R6hp$sl!lWCDR>+7a0Vc*o> zHb|y4`?Qe2o}+`*ZC{wCg1c-r$|sys*AU*#Sc(N#6cr(B?&VqSesY;5EdsVtrm~}y zg)>b;5q~6+#F})X{^O#E<&zPlskG{+YgbwEm2lHAnX*RfI20JcS!HBO*R`^o!Qhk5 z4sITDP@}VXANEf*m}BxW^25AI*X_eHI%( z&m?^5J*-Jw;kdBaVDOc+EozhrUg>6k4Yus*%XY`e(^983Q0`rU80zJ2;|=i?STR9^ zi@-a(6lcyA*YBg}x&qOMH3MxhSXJf5YbAsQ_806_-Q%Ok9@q_F z<4zse!0D@ri|wvX+HG`NdMJ&lr+w+`#Q9L%QjZ*FL@aM|Q|g?MSC6`+fj#jDMNM2! zlGcyg%}I&D1z1;KS6tEQSZSG+Y2o#;Ru4An2m<#J*;FC1s|9jK`?-!Z%U=?0FQMge zJBil@#9W!to;33rcuc19K`)`vRSjE?px{+E8o}f@0ufK4>f|Kn*&^1d?wCWVAtTgx z$n83a6Ec@)O&&`YnWaUZ&YaAo9>)=)riwmm48QelI82*%u0_m<8YtuxwubDpaRy)g|Js4s~USpaR!2vORMv1pzJx`)x|Sdvxil z@JvZVKJKw>pN2Z#{#8-%m3!m?7{yZLpwdxzZxTP#KsSGcJUIEigTk&+y?7zv;3A)= zHIBE%eN0@H^#4dJL2R(`|2!JJLHA+kGAMPrLBma}2e!T9r!kXQbrmXC>1b)EGN@5( zX=m{`DBFJtAy3@5zFdfc9~Kyz<1pg>F-m-5HZO9I;bNR}(XhdPh%c|r;0I(2tm4NH zP9VFQ=*3^2vkc~$RF(4QaI?MK z+==#beON1;XkWJ#%Wb)YixTk_qVh5rYXc^%$`ElX@RlKxmIxVRcF)l+$aI@R(l zBttn|2<<-NB+A?=;*wyct>VOhcO0?)i2=pA-%rN8<{FVhq+-^K@=H703g<*JN-=Vo zq>8}L6s%0kc)y9e^C51_iwkQ4g`ZJ^J<2Dd{b-%jZbl_FeskuE{ zo;Axry5Vdv9TK5^QUuY~8T4uga{1c?@rXeloSwzk8c)X1Xiw0^>f=YnrkaEPlvpJskM5;-OKq za%z6Aeh+gYQI&egR;N`fwMnHwmA%`lB|X+qyA|Z24^QdWhYTb@{j<+Ewn5#W4`(MJWTWUN@ytE(ocr|lq4 zS|V07FNeL35*ZfvW5VEVpS}(&pVBK7=w_3${FlPph}Csg7*C)kzd%gp`@zC4>i~Em zSn&ETR)tj&u24ZfE;vfzF1P2}D%Mvc+h^Q#RgR1EkNU7xMSx4fr3TpT_77V#bmuIJ z4CR8oE?m$aLPvNmG|-4Pg)qxY+Tq?cqc;++Qcw_6LL}@OOzk-SfyWH7dar9so>}-u zKJrM8g63HM0@8})q+-q7i%)%@gU%U!^LG>wa2K#vA?K_rI%vUX35W$ndS&PbOIo#G zDZ`$%>XqnH95-?gNJ(!sIw(G*N9jp|u&7YE3VCWxC)Lwi${A>$i> z)7mPDuZbrGr8pJ>_(7nCOV+y^+%ebvKG)WWjh2UJ|Y|qwdM+xd6Z3>i99g-#N zCA>?93N+zJJMqfXEWVtp7Oh!cT?H!$QV)_U>3b~G)6=)^g5-{pI`ZCA(-T8LAON7B zkpQQ+JYru7H$vnyqSV@}Z^)&^8$kv$@ z5Ud-@WaFsT_Nzi+E_x32em1LszZw-$9uh)`F5{})(cwSuEGZLOlbBC9!WynxKE_ui z%lOsCU?tv{K~*Zzq)~G`tK});xppH)Kj55WRg))DC~Gv>N2`Odsa5flxaxRW^NKs$ z>mCo=XH1J57HH}}Ktm&YJsL-#wOpS3C_l`YpJeyRK(Rm9vkiZmMR=mIF1L1dB;3q> zHT*T$K}&np!4YQ-yIY#v=(wKU2>zD3d0U4fKLRqwqE%r!?~I5);=PNyZb=IZCP%h_ z07Swz0q%daMs`=9J#AiMntt4gkFsN&dyt5opZ22{I*I~i@NL|aR}VGAh=Z?X*QP~N zvFpSB$eh!r14ERX4WoR`B|V+hiHpnIwM9rru0^80kX@rfJ8YoQ<|qMNNNJD{G&W%Z zF8SVW(B#(|W1fQkAe`=vSjnQpws+yf$w7BkYQ_KQiD;B{H*40d4!MTGM>A)s9V16HnIO0`3AgDWLVf3Q!+IeyP$fs7(1^H5|>{dK9 z8FnmbppNOm98VdoWp@4RUdQ5G(kQQT^t10TyZ7tsTyx5CpB=#$HDyK#8T&3vQ!yoE zd=kerHPd=2H-wgdpjA8ZJeYex_ZC2@BlrNP+#|4O_*!=%ks*M@IGx6NFqNI4q9nDm z?{ggpYl1j|WRsKq_2NC=t=)2%l#@@|VYXS)gvH@UUH~hd<_8^5OSk(>O34$tc^+Gh z=2B}uxfp|m<|pWM+=#hN4dbrJU2z+}kdT!$E}9Z%psKRU^ur7*b(1(CljX8O@)* zhs!U0L=kj@yYj=Uz#xXQAbXSzvAkvnQi+A9s3Hte6%>jL0udNwiPCkZeoZLHr^$&W zFu~S~H~t{rqQa809!V1isA9hpu*C>J@4SvJdcWo#G}yANKQN7XwQB7xsd<;RWQSVP zd^Rij`Vxuh1>EjZ%=s+-w5z;5g%;HO*Lm?7x)%TM=Sxex#||{t*R7az--LUz549va zak8!?xA=5pM4C)Ifw{XE@xq!LZ_h6>I|Onf->CW17P;;MMIj0H9=O|q`$$nl&vE^Oj~#Nc2SfzeUwt=5l=T?8}wyj+KwOhw}ukTF#j&u=>O& zPOKGKRc=&;+9vWGhK}c&Xsay1;$bM*sckPY13bm3=gf&_CvPUX0)pHan~^G3L6!Ze zGg5KH>8gdjVgS4U6!Z*Ef+$z^msLd42*+3Kv$%=tm7RI0`q&&M4e`*h%MpxoZ0Oeh z$^gdcFJR9y@e&27{R>{Ur zAz0X@a}?(cuAGQ~M=i$pH!1vky>)L8j(6$1LgWYr0D$yQpWoEZ&e_(^*~ICuQc`j9 zfNjwagwQMM2i*9p2$>~i4V3vIQJsg*EgyiDbbVAWnvLM!7j6Xnt2U?!jtr~0FGpj1 zxl;=YX>$Ucb^XO8x{OE?!~{yFd5L^|dFWP1Fa0v3Qa>0;$xp`@9xpdaWgY$X3B>UU zhn|aDiAY5tSPK@T1T8PqG$Fz9ZtGFy5>_;;SF&e1Cj_^`yPrS7Tibdw z#EtJ}CPvp!dCP_E;-TuBgV6*T%N$N<6dCMi^(e`#cBJ?$CC0?L>F$EP`w6@|H?e_G z1t;(!p;s$v^K_^Ed79sdo|!4;#c%~&b;`t6-`QvK4*uq0s0(d-LuHlFeGbe1sai;M z&nx&=xExU4L2Ar(7O0RDoHW8tPwU3*PuyPIm`thZhHILN5U#LSN7ZLOcc5A1NuJ|d#+n} z(>}gIF!2ongntEri6hhB5J-`+`v(Lr{P=spg<1&_sZI&kYCWS`Hzu~VjRIBt##!Tk zuhc~ZH2J7f_@|gY@e-F#(ldIJzeHr6kSr*gmE?i3vm|cT==L?fWf+xw5RXYvCVhb*gt7ula-gn+ z!)#6HY=*b+sCs9m{Q0t#x7J5Ywx8j)i`DTc#ZNZ62ocu;QD_(g65r`jlD%+0`MGkw zo7$GU3+4w8FiEI(Xu1o8`Qy7Y3sy@{%|u^w^6A4@JjFQYCwJuss8Cv9#rPy)YQ8hSfP1C?nidl-dYB0CkyXq zI%(C5zIGCj%4DwHPIIX_*4W%&65DG#_Xdv>G?Qcg;Q8o>CS3KvE%UMZciX9@l9vnte0?SFWSauAfp!oXu3BC{rPGTE$26nHi;G2yolwcuPk&edN z4aK6GMY}p&{A-cSq9mMej0~M@)j@aFPdNAR_ugnp*I#^OWH*4Qkl_sb2#{|a;G~f% zo@w8Fad%{&bn4(g&edOnJVsc`6WFPGQ7dYsR;Ka-IJtglzGTrX_QBaW7|ap41BkY{|DKJZ`#mRo{j2QV z{BH9=!EsOl4h;LEYJo$RvlDq0YOG7FkpO>vGy4G-qjQ7-eM8cy$84R=bwLj@u8J!k zIC!CV#?FCu>}cEZ@_s;XI6?i>J5u|+xzA`$*kIW-W2`##AY?gc2JICdngYtZ*cfu+ zhY`B0I@n}XPfYhk#HmZ8y0ghAw@#8}6Nph{&!R6Yq~4c6Pd_VGNEj0S&V1hE)hq{Y zuI(4&cc7B{JyV3Dv6J*Y*L zJ8eh_t7gm|GX^WK(ZcxY5N|$qoOWVdS)8&t#)&@&n*jLb6zvbUt4#M|BIYE)BRDlj zyf7EJmbN?8O%T#UJ)TM>w`KlDaXes3iW=ga!-ny5!6aswTV-+eO1HT}t`tb{+DGsDF#MhOW)3c9fYNU_*0bB{cTNUVWE=|wp-c2W)Z1@qzR?Q@C7e& zXu(g@KE=UR&#~nlwW(NFOL8aOluyHwhV~Nj!Zu3daFrPpzT)?H6!T=mmpzg z^C(%teCc`pE(IjqNgqpN(K60MiCvoQ^lh=6!{U(Ao*MkjA?BrA?d6{L5`qk0FM2>5JU^LOP=*jO9+fFA23%_ zb)^Y;rGiDD095pAjnEt)sB0Ks^6=4i#$5xBOW0{s7?*uH9wy-}BnD&<#Ub7^nihyK zOxY&?RJ7PeWy{C~Yv%^g=5oL?TqiEKTKcqPcJs3&sEJ4^IEnerj|}eR{!`* z%eKB}Gm@q^RMfe=buEC8R0@cXyn~M-U2wvCsqvW8eAq0M(^KziqWjOT3_jfhduv%} zh5Fd1@M@_8$(z!CBx$(DkX>U@cBG99nd8y#0~W&O_2~V{>OgeN{nxAvG(FS6)wx!> zSlO)X_ufB}g8;+UIdL}0wx&&5MjNIbXdoa`(agNXLg^U1#m|c!@b0y+z0I$L#ZtCr z<^m}6j^NqTif(L{G-*z%Ab`AP=mV%@mQgRcnR#7O9fma#Wcg`-T66e$Mi=h#apoqR z<98vfhC9F)h?n6)a>7UFQk#j1^__)=M2i(+{DI&{_pGG z;5*llvO^Zb=~Fv%!??hG564Y}03V5kr3EP*mUqCpigg2@xvVxFh3r+auU4+0s_~>y zwBu2>N#H*V@y%>jrcHT<`j|QQ>@kWPh*vHF!tcI&{@97jX`t8b#Ab394ld@l@MyVH zflnAd29z00b!*z;YTF~Mut(A#MV`MU6ZTAN<$K>H+Ezr2fCPFKw%tobr%bU`p_&gf zp_#c$CaGupO*9HsN>mL3gb%!vu5z|?=1Drrd?O!dr0-?KUd$`tGc~`F%Xr23ESbCr z&5InC$; z!6VP1QEd5Pb4E@6KkjG=lmtT4wd{M?06Xbq{V*z&CTgO2;f)?vR{+1ZMA7Q!nbydHE|XDfscu9cl{i788#UPk92KKm z%jtM_9i#kx%S>xyE={f+kq1|8 zor$PZz%XpGuZXa3^J031X6bZf*e4ZhO`;5HN|zd}VcQ)&1~n!d<1qQ_rgn{sxT)>bdfE8vGd#}07?@L z5Aj$65r*~SUlky2)WS1_aO@gbms-i{d%i9Wezl;n;r}S%3?s$_7PNZ z#@Is{UW^WwKdKgrP^8<#(ofClegNx6Rc>9*)&&5JS;B*FJZ!lgsn<6$E$Hx;1+B3F ztAf(%vgb5tfoU&n&Q!2b&@^aN%JlfX9c;<3uJ>m3=Rum;7{-)j)^`R&ddTsJFcA?> z9FN%MsgRK6pbd4il-{FCYGct_j?gBBAZ3IlH9Sl7idi9(FsAylXX)O)cle|m_-#GQ)mFX||dZGAV*V`GuwLlo?yz^rKC828H+P>&$W zgIq(uGjk!z<(q(+x^C~N(!YGj>fk2lA&z+@(oPl!VQfc3;7nB&>(ZxAKSN8?6Zwdh zep$T7W)w1^$JST{;JwZ-kXl4rgk z>mY`)1eu3IQ-*?$DysGK{F?LN@QdT{l$4RfE(Wfjb6f_G`}i#IjGXa z^w9_KY>WhZ`uLx%7N?^NDE9BUsr0wc^564QClhC93tO|lotA9%4aW^u1fMJEFM*@L#YCWr=)Rp(HrWM2F;Loq(TGZTmArMuxHry#6nr+?chyoXYr zT=eUH)#-Q)CVdCf9`%g@dD`5t=F-ZQ4O4N+iUKqL&XK+X2g>$$(KOq)IYY9gwhMRN z=s%0r<2KuY7k1(Uwl%fs}z!kO_-*nxdzOQ~vgEcVN??20-S(nFVu z2l+{)fk_vjsNi*V*^7*DGojTNu{{Z&YCfo>8IEE40kw+K+}l$!{)Xl9IRn3>kH>isq=y6KORqn+gX@_bnTvXg*tX4m z%VOD9zZm?ZW!yWc>fM~xFi^ciTc4MHQq?VR5{`RsF~WkpJxZ(yViZ*&>fwvN$S*RxNsd3I-P(~198J*hF&?-Fv>G6Ll4tn$B$bzGKX^-!KV0rg?_u$VKzhPnm>y_ zWT0z?7weU}k@nUbjF|Z>0MtNx<$jki{1V7S+7ar*urSRe?36Rr)Cjyx(ul<_>NcT+ zBxZKfFn`9&1YI?L2dV{!@C@`^kWtm~;@~!!MWH_vATE8-O?7sdo6`MbJEE&AMw)lc z6NKr{BkFqL3#3*BC_h`QipjiI9^Q?Sq96*xrFHqqgf(H(`1gZEqM@PrCnED!BsXM*NcENFd+5z%(GizKya@F z?}5h>8cbyY0!K7B7Y5uYqAeC|slcDcNf+v|2L2rLp+tjULF*_%`Z#Zk`=&#e`l7}n z6=~2*$s#gW1OuUKekNl}Kd{49`7|~Rw)T%ILIN$J&U|w7D4vcFGU&JNGt##DUG%Pw z3vSW~&!9?^aIN&WX9Zv%+QAUOG`I|tn2zj8=4&iX+c9f`Sg*HUe21o`gN666HQ zW4lRi5Bx@rI6q+TFZKRo3QXzekF@-q0%uSG0HFLc6>&2$RQeaCIoH&Z+xUUvo7en> zaEQDAR#Hg7Fw7jG)7;Qg7O|ljbwI(IeEa8^j5av{#3z3$D9;doOfR-C=?Z7;Vtg&9 zB2jCJ)8>x@RGdI&d1rk|t=Yl-)5BqcIP&pb=OndOl|!d89S_&X^PVw-X8Ab%p-=k) zDPV~!u~PIx$I}$Wgf@d3ncG;af(u6W`tomH=M4?h+s7*zCuQ}8-@0;$w{Tdg&4-1L*JUU(G6)Mol;#!n4xBQz@~Pu|6FW( zfj+X@;AYF!x~h4Z!P{h~-%y$0o30r0N)L_g(%WjYWcDanz8F+x?dE-nY2y)YIb5@4 zBA4S{x3TT&xz#B(-H6f^IattEJ6wq8mxe&(hgt+Bo&Tb{f0~+dK@N*?;lhIuBnvTS zv^4&KRe+&Ci7=)>6ci5u3gX1Cp8V7P7X(BJt`v~R;!gx-L4?G3rP;FT@&sUzQweqm zGI4Q0Lw2yFM)uzzOaS|%Zt#ZgJVp$FfFg?X6bXCEF5(oZF!Q{2BCqh(hyoG9(L@+5 zj2JQE;T6Kk_hFg|AHY%|Ky)N%>5nEnmc|G!!usPS@gD`M#$U5o0uTl;=_gN`40u;q zap)OWGqpr_Z)UuB^0<*6h#7nccMX@9Ssp+HVd1mS*dzX~1=@}Q?wb$iTCl+mgB&Ia z&pZfUuaEeEAfO0)LL6jegg=1+BNz^GMhzLV4Fd?{frQZb&>!;X! zb=DudTcFFxpoCqKU*Hn$v^fao(^4thfX~}{GBkky7$sbOC%3Fl=L0@WdCIUb+Of?x~9q=zbq~8AR$2p(?)HvjC||E(AQ3e>29hjtwBs? zOzyua!{^s|G1|g{$GB;yU;Pryyxn%d4ZaOAwL9NBsNc{k%M=%9n`3q4YrPI_>oU=; zElK)8M^A~HfOoJh*x*9j@oe}a3Vm?!xd|B}Y?Yrtdv39%WuD_wwX8u0a6PF>nrk|b z^Fzc1WImA#y7e*5J-xRmeDDG^#!a&aAoLjU7jZwtut&H-+)snD5vFlwd_NuRKdXv_ zka5WdY8oI(7GGwjTdO}}4#i4;&yTB{XEYE%Roi4_B52NY;O?_{Q*%P7C5DA`8^8(s z4~H~qL|T=WtY0XdknmGA&)=Em*<~aZv?v30j1GdOqHrY7-}>^BQ&QG<#Vf19`zRVJ zojNU$2MGxZ4HA>{qD2f5AJ>EVG0B0&N^crzodt)1a|!~F3pd|<;+gh&wRhHn;qMs` zw)me*IIpo)WpEf23ljrp$5P4u+@&F=bNmWR*b9>$4P;q$Kp9B(8?@eW(3243U|<|YH%R=LPT@pr3Jd5v^mg0We zDLXu3LH>Z1_WO@sGXDkQc*wVxj0F7GTpa@Nd*_q9qn*7Iy^)>c-`M%yjQW3j>%P^+ zJ8{7FfZ!k1VVX<0UF$3qj@m_44x19mtR+=1BZI`o28KWjoj#uiTrL@uiABgLQpLT$KRu@cIIAE+D+}QEUw_nYTY*)o2S@wk}C2IA6w^8!KHFQ)IkrtBQ zW{)1ip3bor?o0;E+h=YAyZ6ybw(<9=xqGi2WldcEf+jJ*p;L`s16*H0S za}~{vymq_D4iy@j`?3Hn;HMHb@YZo4OyJ>oQeV;Kk84Jqw6%=o;2WtwnAgi$QwWN5xFAtQ61Hd!4>ZQj;A87{){R{G<4xVnjuby5H`%4r*p;JYr ze2^oshDRZo(j(t{F;_sDSn=}mXy1jL(ECLyw-n}-L0Ta6 z?~Wx)Mew+LO+7Hij*B3XZ)l>#&Tyx1_le>(4yHy#GTGRm@9ySA(3lw37x)L}xgIWo zk&lah_%Xa}3)k#qjwI~YpO4%NVqdNys`yzQXxHkgXxZQ3C5tjCb-eR-b~W9}z9sR+7ekb@!Y++s-`_#w{>Z#Dzx%XPBkMcI zE}&6*_}V+|b!h%sxqo24NOMVhoB-FLQ`t%Tv|Y&YE^DaMs_!|q%?{fcLfIWLKoC4)l2*G z_f98+RPI zU2T>DX3gCD5TKD$)n3I%hgj1nX>miB7A zmfLwgpJ4{>mhW+Rm3D4M7dDH5u|`6c=0%kd6((ufk6uVOwq@z=yFJb@)({rz8h3XI z6B~#KS4yR+3V{r0RqpXtJ`B?zF?RPe(x{e0SBN-E1ySY7N>6L*yR&z%cq0K$L>A4P z7|c#(o-ih>9XN0fyH>OK2l%CJN4Tp`p_J@sob%#_My@r@x#N5_vWG8|S4Nwpn z>@OeG*H>ov@naqUrmv+04BOXF3YPVqEDcJu8Kium{5IP8mAvy=DM;ZkBjEpQYkM31 zGsl*Q(rhD(@yZ-(Lh~}r76VBolj@Js!QSN0xWw!Znsc^{94l>~6V07pnE(|tM$m&J zt|3KGw`{z`*qNpZu~ECyl*odTg-%vkkYZY%I#N{#ZP2+eb`^FY&@Y0K;byT1P8*fm z-ykmFNzuda!4$1i%E#aD5f_1x^3M+Mthjx!w&Q;4+brN=BAve`Nug+5wYl@X-hLdKe@w}G> zQMT*WZ0m|6@v50RYI(G#>q4@)#Y77#*VRNph-yFtU#LF*McUK@|7h%J<; ztgWP-P6wh6(AyxsA;L#NhiE! zN4Ph=VM}GqHhix%Y_>wTEeB#xbON?QHB!gSBFV(8`7YKD#Ga^R?0{NTd(nOsYV)`% z)!FSbO@ThI6}v;QAo zN22=ig!KQ~Iq$EgvNa3`r1z#k6p;}SX(GsgQU_2_E*NRafHb*)f`CY90}7+`BBCNn zHPS*rs!{@xu0xkgLXb8JQX>u!CXhhjMkhm(Gp_p=-2CvZm2=kf?!C`Cd+oKq{hm*` z@NX4I+*@WvJA3*oZmE!m^dPi_~;%I77*vTU*UEI$WVG#ao6i>?z3 zx15ZF(Y)$~8wL_v7@iyeEt zb`$8C5uOr%6R2Z5>_yK>iF3a{YvjFEFt4~SSdbl^R3d&?xcp%3y{dz=l0HgW*OAoN zB}F$=L0%UGm((uA^ie2MuIB`{O5+Oc+9+g8(p#rZ1q+&y^+{M0(l#Tb*H4lGuM zh~ub)>`RHFHKd0X!R_}6I#49)V;AE)*KiirFQOxd7YOWX^ja8F!<8E}6@lAoo$gdS zb{)d2;qJ~UuBYZ@ztc(aXIIX%7BH;xmpIez|Hu^?IBDZVA4@9XaSq~5ux0LE_yU~_;y`;w^L+_Pr~+hf;EuSU&we~c>)uW>w3)T^Ja*~3o~aY&m%MRcta zE9N$JR)6;Mo53_%JNe-o6rE(EJwC(^#OGGqI%Q1foi>3fWcVg{J18{0L7qD$ZXb;^ z!j}&GYQ9x)3kNGW=77M?i>r5GaruEATMPWCcDBCmancUkiBWsFTKJ02bL=au9JTlnxG-23Vqwo#BN~Kcs zRpuaiy?oeAN})8mbCJ441;d|n#ON1$%_Uo1mtChhnf@C5pNW z>m3+yC8s6M;ipU=Csp;?Q=U$vbN7u8yeFZ_LzkLytZoRd$WrQ{HC_07V6x3sIAN3#}Vhf(9&c8`}|_!eYvH*K~GiCuRTi_Z;BQ7bDR2 z<l2G9R-YFUDEqo^QV0|ql*-bH%1YWB;~_oDHymN>iJt+O7nU`b9nVey9~Q) z9alV0h|Sdr2!EIY+}JIOx8dy2y-*c9->SqiF3*>S{7V^J`u(+k{NOg;5In_G8Iu_9 zI>Ie8qHZ?$$8Bn47uTpUT;qjhbr$hF)c8Yl75^FWn8D@!%02=c`<>Nae++qmUe;fC z5!WlN9hTlj`d?p;K?v2dU5by2;f9W8hZyagTFvI zLO=XTk8!J-a~pvKqToMQ>*KcbuVyY)N7Y2uNTn_k>J~bOlm!A3854w!U5%FM|9;`a zzg`DM{7xA}h$X84YY8?63If&BW*t*OHXz7an~E94(mq%ah6XUfIQF-&)`*$GVyeGJdg%jq!~0+frb8Nm5Co2l9M*CzAl6!APQ}2Za}{N`!5ni&Qt&Z From 5bee503cba08750293ecce61c797c2d893e9bd8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 15:18:49 +0200 Subject: [PATCH 06/13] Disable code formatter around regexp def --- src/LineBreaksHelper.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/LineBreaksHelper.php b/src/LineBreaksHelper.php index 4c1d1e5..d31755e 100644 --- a/src/LineBreaksHelper.php +++ b/src/LineBreaksHelper.php @@ -61,6 +61,7 @@ public static function clearCsvValues($sample, $enclosure, $escapedBy) * enclosure: |"|, escapedBy: |\|, regexp: ~"(?>(?>\\"|\\\\)|[^"])*"~ */ $regexpDelimiter = '~'; + // @formatter:off $regexp = // regexp start $regexpDelimiter . @@ -90,6 +91,7 @@ public static function clearCsvValues($sample, $enclosure, $escapedBy) preg_quote($enclosure, $regexpDelimiter) . // regexp end $regexpDelimiter; + // @formatter:on return preg_replace($regexp, $doubleEnclosure, $sample); } From bba5f7581916781ccfa7b7c0449298f99bbd7d25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Fri, 24 Jul 2020 15:37:09 +0200 Subject: [PATCH 07/13] Fix codeclimate issues --- .codeclimate.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.codeclimate.yml b/.codeclimate.yml index 641e74d..99a2ccf 100644 --- a/.codeclimate.yml +++ b/.codeclimate.yml @@ -2,6 +2,9 @@ version: "2" plugins: phpmd: enabled: true + checks: + CleanCode/StaticAccess: + enabled: false phpcodesniffer: enabled: true sonar-php: From a6d0826b871506676ecf9479802bbb88d4446411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Mon, 27 Jul 2020 08:48:04 +0200 Subject: [PATCH 08/13] Test PHP versions 7.1 - 7.4 in CI --- .travis.yml | 4 ++++ tests/LineBreaksHelperTest.php | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5557094..74ff91c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,10 @@ language: php php: - 5.6 - 7.0 + - 7.1 + - 7.2 + - 7.3 + - 7.4 before_script: - composer install diff --git a/tests/LineBreaksHelperTest.php b/tests/LineBreaksHelperTest.php index 584cb8b..5f22caa 100644 --- a/tests/LineBreaksHelperTest.php +++ b/tests/LineBreaksHelperTest.php @@ -136,7 +136,5 @@ public function getDataSet() $lineEnd ]; } - - } } From 86f24a07ab30b7ed95b0991af735b8f6e3a0cd4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Mon, 27 Jul 2020 08:55:42 +0200 Subject: [PATCH 09/13] Fix test compatibility with PHP 7.3+ --- src/CsvReader.php | 6 +++--- src/CsvWriter.php | 2 +- tests/CsvWriteTest.php | 39 +++++++++++++++++++++++++++++++++------ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/src/CsvReader.php b/src/CsvReader.php index a0d382b..ee1fa9a 100644 --- a/src/CsvReader.php +++ b/src/CsvReader.php @@ -111,8 +111,8 @@ protected function openCsvFile($fileName) */ protected function detectLineBreak() { - rewind($this->getFilePointer()); - $sample = fread($this->getFilePointer(), 10000); + @rewind($this->getFilePointer()); + $sample = @fread($this->getFilePointer(), 10000); return LineBreaksHelper::detectLineBreaks($sample, $this->getEnclosure(), $this->getEscapedBy()); } @@ -129,7 +129,7 @@ protected function readLine() // allow empty enclosure hack $enclosure = !$this->getEnclosure() ? chr(0) : $this->getEnclosure(); $escapedBy = !$this->getEscapedBy() ? chr(0) : $this->getEscapedBy(); - return fgetcsv($this->getFilePointer(), null, $this->getDelimiter(), $enclosure, $escapedBy); + return @fgetcsv($this->getFilePointer(), null, $this->getDelimiter(), $enclosure, $escapedBy); } /** diff --git a/src/CsvWriter.php b/src/CsvWriter.php index 45c683d..454a571 100644 --- a/src/CsvWriter.php +++ b/src/CsvWriter.php @@ -90,7 +90,7 @@ public function writeRow(array $row) "Cannot write to CSV file " . $this->fileName . ($ret === false && error_get_last() ? 'Error: ' . error_get_last()['message'] : '') . ' Return: ' . json_encode($ret) . - ' To write: ' . strlen($str) . ' Written: ' . $ret, + ' To write: ' . strlen($str) . ' Written: ' . (int) $ret, Exception::WRITE_ERROR ); } diff --git a/tests/CsvWriteTest.php b/tests/CsvWriteTest.php index a3323a4..24f77f8 100644 --- a/tests/CsvWriteTest.php +++ b/tests/CsvWriteTest.php @@ -6,6 +6,8 @@ use Keboola\Csv\CsvWriter; use Keboola\Csv\Exception; use PHPUnit\Framework\TestCase; +use PHPUnit_Framework_Constraint_Or; +use PHPUnit_Framework_Constraint_StringContains; class CsvWriteTest extends TestCase { @@ -87,9 +89,19 @@ public function testWriteInvalidObject() ]; $csvFile->writeRow($rows[0]); - self::expectException(Exception::class); - self::expectExceptionMessage("Cannot write data into column: stdClass::"); - $csvFile->writeRow($rows[1]); + + try { + $csvFile->writeRow($rows[1]); + self::fail('Expected exception was not thrown.'); + } catch (Exception $e) { + // Exception message differs between PHP versions. + $or = new PHPUnit_Framework_Constraint_Or(); + $or->setConstraints([ + new PHPUnit_Framework_Constraint_StringContains("Cannot write data into column: stdClass::"), + new PHPUnit_Framework_Constraint_StringContains("Cannot write data into column: (object) array(\n)") + ]); + self::assertThat($e->getMessage(), $or); + } } public function testWriteValidObject() @@ -182,9 +194,24 @@ public function testInvalidPointer() $pointer = fopen($fileName, 'r'); $csvFile = new CsvWriter($pointer); $rows = [['col1', 'col2']]; - self::expectException(Exception::class); - self::expectExceptionMessage('Cannot write to CSV file Return: 0 To write: 14 Written: 0'); - $csvFile->writeRow($rows[0]); + + try { + $csvFile->writeRow($rows[0]); + self::fail('Expected exception was not thrown.'); + } catch (Exception $e) { + // Exception message differs between PHP versions. + $or = new PHPUnit_Framework_Constraint_Or(); + $or->setConstraints([ + new PHPUnit_Framework_Constraint_StringContains( + 'Cannot write to CSV file Return: 0 To write: 14 Written: 0' + ), + new PHPUnit_Framework_Constraint_StringContains( + 'Cannot write to CSV file Error: fwrite(): ' . + 'write of 14 bytes failed with errno=9 Bad file descriptor Return: false To write: 14 Written: 0' + ) + ]); + self::assertThat($e->getMessage(), $or); + } } public function testInvalidPointer2() From 602fa6637c97db88269255695eb9f8fe0831da38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Mon, 27 Jul 2020 10:55:22 +0200 Subject: [PATCH 10/13] Add performance tests --- tests/CsvReadTest.php | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/CsvReadTest.php b/tests/CsvReadTest.php index 027ca69..46a8e47 100644 --- a/tests/CsvReadTest.php +++ b/tests/CsvReadTest.php @@ -7,7 +7,9 @@ use Keboola\Csv\CsvWriter; use Keboola\Csv\Exception; use Keboola\Csv\InvalidArgumentException; +use phpDocumentor\Reflection\Types\Void_; use PHPUnit\Framework\TestCase; +use Webmozart\Assert\Assert; class CsvReadTest extends TestCase { @@ -489,4 +491,50 @@ public function testInvalidFile() self::expectExceptionMessage('Invalid file: array'); new CsvReader(['bad']); } + + /** + * @dataProvider getPerformanceTestInputs + * @param string $fileContent + * @param int $expectedRows + * @param float $maxDuration + */ + public function testPerformance($fileContent, $expectedRows, $maxDuration) + { + try { + $fileName = sys_get_temp_dir() . DIRECTORY_SEPARATOR . uniqid('perf-test'); + file_put_contents($fileName, $fileContent); + $startTime = microtime(true); + $reader = new CsvReader($fileName); + $rows = 0; + foreach ($reader as $line){ + $rows++; + } + $duration = microtime(true) - $startTime; + self::assertSame($expectedRows, $rows); + self::assertLessThanOrEqual($maxDuration, $duration); + } finally { + @unlink($fileName); + } + } + + public function getPerformanceTestInputs() + { + yield '1M-simple-rows' => [ + str_repeat("abc,def,\"xyz\"\n", 1000000), + 1000000, + 8.0 + ]; + + yield '1M-empty-lines-n' => [ + str_repeat("\n", 1000000), + 1000000, + 8.0 + ]; + + yield '1M-no-separators' => [ + str_repeat(md5('abc') . "\n", 1000000), + 1000000, + 8.0 + ]; + } } From ff9aed82715821ccb97b9c1f805ad8758aee1515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Mon, 27 Jul 2020 11:42:23 +0200 Subject: [PATCH 11/13] Dont run performance test in CI --- tests/CsvReadTest.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/CsvReadTest.php b/tests/CsvReadTest.php index 46a8e47..b235346 100644 --- a/tests/CsvReadTest.php +++ b/tests/CsvReadTest.php @@ -500,6 +500,10 @@ public function testInvalidFile() */ public function testPerformance($fileContent, $expectedRows, $maxDuration) { + self::markTestSkipped( + 'Run this test only manually. Because the duration is very different in local CI environment.' + ); + try { $fileName = sys_get_temp_dir() . DIRECTORY_SEPARATOR . uniqid('perf-test'); file_put_contents($fileName, $fileContent); From 40d85d0550f997776baa38b674e5c6d54abec7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Tue, 28 Jul 2020 08:17:28 +0200 Subject: [PATCH 12/13] Replace variable with constant --- src/LineBreaksHelper.php | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/LineBreaksHelper.php b/src/LineBreaksHelper.php index d31755e..a6e18d6 100644 --- a/src/LineBreaksHelper.php +++ b/src/LineBreaksHelper.php @@ -4,6 +4,8 @@ class LineBreaksHelper { + const REGEXP_DELIMITER = '~'; + /** * Detect line-breaks style in CSV file * @param string $sample @@ -60,13 +62,12 @@ public static function clearCsvValues($sample, $enclosure, $escapedBy) * enclosure: |"|, escapedBy: none, regexp: ~"(?>(?>"")|[^"])*"~ * enclosure: |"|, escapedBy: |\|, regexp: ~"(?>(?>\\"|\\\\)|[^"])*"~ */ - $regexpDelimiter = '~'; // @formatter:off $regexp = // regexp start - $regexpDelimiter . + self::REGEXP_DELIMITER . // enclosure start - preg_quote($enclosure, $regexpDelimiter) . + preg_quote($enclosure, self::REGEXP_DELIMITER) . /* * Once-only group => if there is a match, do not try other alternatives * See: https://www.php.net/manual/en/regexp.reference.onlyonce.php @@ -78,19 +79,19 @@ public static function clearCsvValues($sample, $enclosure, $escapedBy) // once-only group start '(?>' . // escaped enclosure - preg_quote($escapedEnclosure, $regexpDelimiter) . + preg_quote($escapedEnclosure, self::REGEXP_DELIMITER) . // OR escaped escape char - ($escapedEscape ? '|' . preg_quote($escapedEscape, $regexpDelimiter) : '') . + ($escapedEscape ? '|' . preg_quote($escapedEscape, self::REGEXP_DELIMITER) : '') . // group end ')' . // OR not enclosure - '|[^' . preg_quote($enclosure, $regexpDelimiter) . ']' . + '|[^' . preg_quote($enclosure, self::REGEXP_DELIMITER) . ']' . // group end ')*' . // enclosure end - preg_quote($enclosure, $regexpDelimiter) . + preg_quote($enclosure, self::REGEXP_DELIMITER) . // regexp end - $regexpDelimiter; + self::REGEXP_DELIMITER; // @formatter:on return preg_replace($regexp, $doubleEnclosure, $sample); From 2811a3520fbf46b9a62c74c2511cb0f24880d6d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Jure=C4=8Dko?= Date: Tue, 28 Jul 2020 08:18:37 +0200 Subject: [PATCH 13/13] Remove unused imports --- tests/CsvReadTest.php | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/CsvReadTest.php b/tests/CsvReadTest.php index b235346..dec42c5 100644 --- a/tests/CsvReadTest.php +++ b/tests/CsvReadTest.php @@ -7,9 +7,7 @@ use Keboola\Csv\CsvWriter; use Keboola\Csv\Exception; use Keboola\Csv\InvalidArgumentException; -use phpDocumentor\Reflection\Types\Void_; use PHPUnit\Framework\TestCase; -use Webmozart\Assert\Assert; class CsvReadTest extends TestCase {