From 7f55816ebaa2777cefd759f6628e7753c91e0faf Mon Sep 17 00:00:00 2001 From: Martin Hanzl Date: Thu, 11 Oct 2018 08:55:38 +0200 Subject: [PATCH 1/8] detect actual filename of document xml (prevent mismatching document22.xml as in #1253) --- src/PhpWord/TemplateProcessor.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index 7a5eaf55..86c9e1c9 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -507,7 +507,13 @@ class TemplateProcessor */ protected function getMainPartName() { - return 'word/document.xml'; + $contentTypes = $this->zipClass->getFromName('[Content_Types].xml'); + + $pattern = '~PartName="\/(word\/document.*?\.xml)" ContentType="application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document\.main\+xml"~'; + + preg_match($pattern, $contentTypes, $m); + + return (array_key_exists(1, $m) ? $m[1] : 'word/document.xml'); } /** From 7eb19c8f76c6a0a4ba56bc779d3f6024dc172822 Mon Sep 17 00:00:00 2001 From: Martin Hanzl Date: Thu, 11 Oct 2018 09:40:12 +0200 Subject: [PATCH 2/8] add test case for issue #1253 --- tests/PhpWord/TemplateProcessorTest.php | 10 ++++++++++ .../_files/templates/document22-xml.docx | Bin 0 -> 11126 bytes 2 files changed, 10 insertions(+) create mode 100644 tests/PhpWord/_files/templates/document22-xml.docx diff --git a/tests/PhpWord/TemplateProcessorTest.php b/tests/PhpWord/TemplateProcessorTest.php index ea739561..2b3a9fd1 100644 --- a/tests/PhpWord/TemplateProcessorTest.php +++ b/tests/PhpWord/TemplateProcessorTest.php @@ -276,4 +276,14 @@ final class TemplateProcessorTest extends \PHPUnit\Framework\TestCase ); } } + + public function testMainPartNameDetection() + { + $templatePath = 'templates/document22-xml.docx'; + $templateProcessor = new TemplateProcessor($templatePath); + + $variables = array('test'); + + $this->assertEquals($variables, $templateProcessor->getVariables()); + } } diff --git a/tests/PhpWord/_files/templates/document22-xml.docx b/tests/PhpWord/_files/templates/document22-xml.docx new file mode 100644 index 0000000000000000000000000000000000000000..206d80f46063d4f9ec4594b191bbd6f493879b8e GIT binary patch literal 11126 zcmeHtWmH_t)@|eNjT1s}cW(&p65L$^1b27WK+_N)I01rNaF+zPAi*IJJOp=paLzq9 z$@$K^@BP2u9%Jv(yQ=1@+Er^-RnJ;Vaxkz!06YK@z``e|a06?grT_&15W@igH~>Uw zT`_w*7jruoLp4tab7y^K4_h0uTv%w@YydRif3N@Q9;k&VD0Z`AhE|w(>QdP?3nOyv zLC=U%c|m9oMQKfR zq$5XW(r&b>RGJMez#$-*?JG8&Wy1rg1pYgU&ZS0BH~a?6J>FWhr#m>RgM*L9;g&>d z_Xkk=E#jWDgJb-s4aUTG6I)`rQ*r(THzt4ypdP2@ z;suVLg;{qY%KIYk&Y)UXO{ z&*ejxcuY}+=IsJdh!+&Y;gC2d1{E`;rfNl6FEAjU44cK(>iWEJRM{XiQ}b=2(0lKZ zj~}y*XHTA5g z;ZU=?bF!iP`bSm`RDXOgIZny$OG$qrmA4m@o#81=bFc5l z5Sf-kD=(3EHGfiqD=tW$Wk}*<06YNTOAP?vKJi>0}(`JYd=$E9gcXViY4 z8`y?1b1%I0LkE1?&AfwGXH&16SZ7&{F8G<SG(Uw>*niJ5*Vw&!% zSlh!T$RL;ak*P?QX{WI@Oj%ytNIfs4*tFOGqgt#$lDa4tfd?EhS+uUoER~vD{nTDr z<-mw4AC+49(zdo8TT)^HGGd&YY=q0al}BTC32h{ys#AVYk-09+mmzlZ0LKaT7q3;_ zObP1A?z#VEnlEI#}uKyfqv^D(|oa zQ+phCzquEE38f}V{$*7I(7O(^TR-`yU*|LsL3$*i$M zilp5drlSMeUf^5Km|!J-CZcLhczXoQ!p;oKE@N0z557tkXgCjBbR@=bmxLyg`V__; z+?2r@P?WPS)LSIIxJIH7_UTLA^0QY`rS z;;@^F{)#PN%xVP}6ZN7Bu@0IyDVb2mEg4CZNryX_Ze8VTt^D$<2`3nQiwNxo8L4Q; zums=uf@)Dm0X0$v*{}iUNG(*C1>8083dwA$YbB|7!<;p!>e$uG;78n!r>WrRqWaoh z^k$e}ae4i;bE{Pg=@)#3S=fjt@tXz&LK}5u-Qn#E;jpo*A$|EP%HKQ;ce$*U28sA|*DK@7o>AZyM!JN#qi9NPSe1C$FTz z%{LxI^5M0$HuXec^bb?zqkE0P8!w+*w}Fy8xN1UZsM@C~=PAl+u6II)fp=l1h5j^4 z1J)#9MQOS>$T#)InugwKhE@Oe$#Fw|JBoEn1LFXIla#^`XdzO}0=mtWBX_F73swwe zYUg#K&a1 z2oXYh456LbMfS5!TFF}3^4O8;Qj3sXR2H4o0*TY`nr1FG*F(e@P$rSk<197>w6rF` z!6qqw1vvHJmRX`)%{-(u21$v!39)V*Mzp-n;DVRN8PIxz>b7JRlJtTJyKVQ%?c5nh zKeVPTeYoZ-G2X5%ShSktuUlljs>?ZV+Gw5Rpu`FZYpt{fh{78bfU$77lrL;^ic#wIqGjEya`Ut&gL|CU8k-D44nw9Zn zqCtpWPj@tD-20v=)<%P#+7hrfiFP=ph@MPS#QEf>iV9};#{7{$aBN55hsKU*kG1p> z1ejl~4waM;xEFc!;v{5@>}GmALHnnus%-%dDgB;Ictopc4RZ??P( zCDpQupM5GMwsPaglq5ohpxEV1-bZ`(;pQDA!mY6YUd%pVoQ~CN1xu7c&G|fHpobX${;OSr2Ai!r6uKK$7<%6= zjIo~H)&h56BN6>+b1v6n!DcBu;yG~BTBE`HjS?0V$9AKl5 zhX6uK^beT=`^==(z4Xm@YYg_{f%pq6EEAd2C$>kAY1tt!1Diy3>RXw6 zPKS_=1FzL|y*I3)kchj`=N))$Ssm8F$gXqBqry+G_@BgC@YH-aA~}$W_E#KaqjFZG zR!!583F80{Ib70r9={6NI7S^E`<}(jaj_x zi!qDK4}@Ex@)G;s2|Q0%hST&QhTOse0N8&K_~nBjqGoJjWB$nCR`A}dc~)SUy~LKk z>8#?F@lG;hNsiK)>34XbM4iN`u?xhbvF>(@>J-oIJFS5>t+kOF{ImA`af^+d_oUB`|Ki9xH@8phFtOAfbQA9SESG8tJmsNX9srTyi95wIE9Nstz7Q0i7+q=TM9g>D$}y{q7bGzBI`( z`X(eOsOAwgLL7KP;L+Dqx^1!Mp#xK=0z#4N8_QK{o1d_oLj;Nn(Du({@A_M-Nxav; zRv7fpOub1|nz`RfY6xgv{yEYBz%ziZ?QJ5`!$==K+`zv^+TGkl^_STiGTtc*!-fgJ z(!2s6ozEA~H{%un(_>2akOmZ0YHkne^YmmdysG%%0rXeC82k%*+Pf(k0Q$5S^51zX20l zf2@GYK&2D=+#5%rKMvN!d(v)q)PeI-vx~e(sm@LBi{{JG13F|^{R0O%zu@?R@?~~$ zT)KXKv&-=-)eo5-si^`HTh9sF1zny!Ng4l$71GIpF>TrM)62WsLMW60Tl*-#CH#U{~3x~qcTb#)g901_{9tQBU z>mFCRfE~GXu7_^Reb6c}0ftUIqef~iX^`NNwVc$S_fe`L>CZyY@>gYaD^ON#XtQcjrnr4*L~%iPn*BIg?HXK zx~SU7w1=K@Ob0Se(&D7Y+q7{sNj67@MmM}y3nD!n-0OZvBp> zh~d(c+Dd)K(6c}|Y8&1cXzvL~W!k)C|=8FK7HzUVnl1u-|LXpz@YjXcXhI-omu}h03scw@hmfu|sVLRm2qd5KY z!PT$;^gmtA)ZXcj>6k2+`hR`x|BGfx;Bm!ZR)SE&&=b*!l8D$7~_H;Op%>`IF|O`t}s1?Q+_Uz&w#(Lkn&b>TlT7lDDY?}hX71XduZIK)f-rs+$g<@W2y^k4Dka>lUF$~(V$eGfW&-$aFC$`Dxo;(e%IG?j~t zsSXq}mC{&wsv*XdbS8bOQ6QDaho{ZMYdF#mTlN{QPxSRP$mt&>V)ur9%T6S)twhLZ zE11A5Xt*!8qOJiASzJ+|sn{sVq?D?`q*;h-WY{sR!*!h3Y~hnAeY44x2)g2^n@`T zw2uq`Y&Ze{7!QN-hf?M2;%Q_4$ELqEW7ci00qbENkkafA)lF`>jJkJ_T|6KL3=<}5%dIs|OvVP3t#7?h^6VXjSSqRtHS4E4zq zofN)w_vv&>&Hp&~jjqT*qhcuQ1^s}Ey>GvYVT6T#3w5Zd*nUNxTFePe$!ga03tqXJ zUA4+-j9X6Y7GDbJ+aEJAwf@^L{8>G@(iH3M7YkMt`g6^1sb8bF4W~(PFQr+ZOhr&3 z)L7&2l@sy!&40VtV^DP&@-ap>iH_NQ#?2?l_+AAFpPH#R$PqLRE}IB~ zUn~#LB#o%~Y;>jCc2RnRPddbY+d0(-ELgrdJAfc2ZL#@tH8s3nN@IzpXqTu$t$p0w-A(Clk`-fm8EIkTh6 zczDIXKL?8g559a(@U0YrQ796YQp+}_A49_3f*`v{!tobEpOR*}DHZrM2$Nt1b{31k z!AHa55|kpT8CAk61$M@~fP=>oz$F;kR5N1rRSg)lO*zj-!#D4CM9Hs~HeVRLV1SB~ zAbMF%LvBfotgkDI7eO=vM||6eH1F`}^6=NcrklikvXMbpQx;wfnu_A9?_AJ{# zHN|=8qLw;1?1nw%{rqUO1wrKvj=@dsl0dB4?wJ0Tg%fOo*XFPkwicl;$Yr)s$4BxB z1`MN^_64U595i1974I3rqI-j6#x*KUR3=sF??Ww3+Ln6+#`zWo9VTh0@^y{_F!=_t z)#Dlq?h7aG?(PXbF(gL;Sux)RTv0Zk_pN@l^#aR_i|_afAg0EXe%)BTyF9EpFI^>P zV&V^v@W`u{4|N&a(ONZZJH9lGEcPgy*sv6vTCpw`lM8cWpw-C4&WEOlEJZ{~kCm3e zUx1c}aOI%M7gW0jt@YRyL({8Rlg+p%aMpERNCCa6U~WKi;*Ro>Lx!WAA3i1Ya%SBJ zU&E>{AgQ*uJ(Zm5 z*9oo$cyR5*&67%|UK$`-1D&M_3UST`qw|vnsS~@mRrp??d4S1Ya}+N1sY@t=yTr(Hg8RhC z%tFuiID4PS8R$s$Xm3i-QHLfwdQ9_W){U3JxuX}^TFzSv?2^3j{&ZOG+s+FGC9$(j z2Ffl>`VN2;ekXu5eowh$uHoR{J4?F9w7Pz_pOyY<*`NXnFJ$fRcsr|cpabcUZuR*G zgx1C=l^?Z1ahMh#&o-WTTbyMDe2=-~NXQuXm#yZw8djM0SfDiTJ;^G2NS;W}2+V3< zkqu05Cj;>Liv&#Ez_BSFe~H%*dMMv)WU*q4Lg+D%J}War$U7h8!80$4x>Oe*SkGkk zkmGJ8JueFQT!*=0rW@f{&D8T}X;Jup7uiii(pp=zL1FWl|0}ft4#!#-QFsmyI{hJC zu`ZOL#r7GDy>4H}JRbW&T`20~tA#G+{!876&VMI$qK#<%Kk7pHhZd4P4X9;;C9_3= zdaQ`%x7I&=*Pm^wVTy3C`FGJx)BIfLnQg}%uYL^TGcqKH{cdd`PPg{el5jFH# zK&X!UJ+Ue*2D?lUDJjIL6uF$CzAKOm$xgh>?nnAN;shE4Hq&-Aq3wC?!ub+Is9a|=ZcGa|@lC`~u=We$lP@E2 zTTHo6#&s1y&@?4UYuObUq4;wnPHH4x=w%mx`8h1hkI%Xk96JcJdCeLb(6!^Dx*M^} zc*V1EpR$_HW7$Q)i~v498`O8~d%%=CK9q+3Gq*`gmk$OQ&glM-tL)eSBhG;SXzMYz zfexMbXI8xa8g4k^hgHl#4HEJNAVNA+lvr}}YW!4D10OO6S?jAFigT-2>2D^B5#l2= zvC?l&im`sA8E|2iZ{tFS*jit`+DtLvl5ml)NPER!ped};ds3`!`1r8wJsBwHWp90@ z>Jwo5WTJ%oW5RZlx}%xbRMA7~jv7g-!7Q#+fu?N~#bYMI&`jKa%1_umfoQ{iDzjy0P&+x?Q&^tDb)XZFgZP7_T) zQFVd_(`x2l&`!VX`YmRdm$fyzgKL|AXpbLzpK|Mvc@_JqB;|wg@msBnb~)dyzbM%D zC(RY+n4YQ#O{zsRn6l=7Y6TWF{nDXXzUkwg6hxZKD+AVTy zJyVst&*l5utE$XqES=@;nc4lR+xYHgAIjM)E_3kU$T*^e5`|Ga87=I!QLQE!ukXRLR1?KpPu8f&+@!?I_u zvuCQA)RJLkWYW;Cs|8OI&rtGwA`{(c7vQ}k#bpHqC?!7m`w1B?3gDL3Q^~4!Qhd{I zdN1X{h&@n)Xe;l7Vuou({BoRTxmX}Tzr-*APOeH}-R%8OX; zOh3>1JPJPC6QBDdpXNaEt!E?YpRlv{)!$<};@oet+@z{^cHK@U}Q&8m&`P1-%QHq*@st|O> z)r042qF=dSB$cX3%~~z8xTL42Goy=8!(HYo#tRe`^5DE${aLd*c`Mr8}WFiky8sI}`C~ zkj)6%`^7?n`Zpa{6lkbgbc%O^8t)`OG&+4Bxhp=>oG$V(*DXn|P{(ivucC3#&`1@X zEPS0bl*db|i=53Gwp-jT(*=X&)R`G-EMSf5QzW)ZY$Y_p)+ROYFhtyeG1Q4)S0F1! z>}TPo?sv>V8Ao^9n=;d&*_1c6J7%Z5S?g1j+^zz%I|$hwoF_i*`*R`dl}}?zT9trz z{N$9O?8vIBLQJW?ozsn2to^NiuA1YFJY@}dpCWRT2xVR?ItSrE$Fd&DvEg=+M9zMK zm>fe)J2~Ii_OKPP@SWCoW#pcHfOWr?*3Ob?HOAS zel7lRD)i(p_y04v=Qk4kE7IpbFkwREG4={>tVFAZMp0G-acVD2p`HrgA;S?nVJ6u^ zc6(OKfwDXRmr`wlysG~bc+&{CzMODTk6Y^j*|l5iDT+nA4SNYvBfn|rdXc_Mtv$bAlX z^CGXlj_2aru(5crX4E>?`DkDI4Q%dom4pV_>*w5?$_ZW~HpFm_^9w=oHJ)1-sQQVF zFOv?eI^=pehk;Y+$coJu#B6Boqx5+T!!lJ%j%DZFaGNxJjD!8;HSLvm4+m;c&`b}C z>|=Q1V_?+p;fa5rA^ss)@$UwHkF5IDK-j~W{1RvNEAXGxM&Kd(DKp$|h& r`7iKq1pGVxcjEqvH=y|!{{Kc}B{{eU2YHO+LjiO@EPWZYk8l46F;eAv literal 0 HcmV?d00001 From e19de8e8a481ce3fb1aa84ff236952c22debb277 Mon Sep 17 00:00:00 2001 From: Martin Hanzl Date: Thu, 11 Oct 2018 11:28:44 +0200 Subject: [PATCH 3/8] #1253 - add explanatory comment --- src/PhpWord/TemplateProcessor.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index 86c9e1c9..ced3880e 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -503,6 +503,8 @@ class TemplateProcessor } /** + * Usually, the name of main part document will be 'document.xml'. However, some .docx files (possibly those from Office 365, experienced also on documents from Word Online created from blank templates) have file 'document22.xml' in their zip archive instead of 'document.xml'. This method searches content types file to correctly determine the file name. + * * @return string */ protected function getMainPartName() From 9f28ece4e9b4d8c917135af05d58370c62a40287 Mon Sep 17 00:00:00 2001 From: troosan Date: Fri, 16 Nov 2018 22:40:37 +0100 Subject: [PATCH 4/8] Fix path to test document --- tests/PhpWord/TemplateProcessorTest.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/PhpWord/TemplateProcessorTest.php b/tests/PhpWord/TemplateProcessorTest.php index 2b3a9fd1..1513486e 100644 --- a/tests/PhpWord/TemplateProcessorTest.php +++ b/tests/PhpWord/TemplateProcessorTest.php @@ -279,8 +279,7 @@ final class TemplateProcessorTest extends \PHPUnit\Framework\TestCase public function testMainPartNameDetection() { - $templatePath = 'templates/document22-xml.docx'; - $templateProcessor = new TemplateProcessor($templatePath); + $templateProcessor = new TemplateProcessor(__DIR__ . '/_files/templates/document22-xml.docx'); $variables = array('test'); From c51b6febc0feb8841202e38b817422ab0bcb09c5 Mon Sep 17 00:00:00 2001 From: troosan Date: Fri, 16 Nov 2018 23:00:23 +0100 Subject: [PATCH 5/8] rename variable to comply with rules --- src/PhpWord/TemplateProcessor.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index ced3880e..b4102bcd 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -513,9 +513,9 @@ class TemplateProcessor $pattern = '~PartName="\/(word\/document.*?\.xml)" ContentType="application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document\.main\+xml"~'; - preg_match($pattern, $contentTypes, $m); + preg_match($pattern, $contentTypes, $matches); - return (array_key_exists(1, $m) ? $m[1] : 'word/document.xml'); + return (array_key_exists(1, $matches) ? $matches[1] : 'word/document.xml'); } /** From 925e9e091910bf90290dcfcfeb3a36fe94aa6855 Mon Sep 17 00:00:00 2001 From: troosan Date: Fri, 16 Nov 2018 23:33:38 +0100 Subject: [PATCH 6/8] remove trailing spaces --- src/PhpWord/TemplateProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index b4102bcd..f9a8ceb6 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -514,7 +514,7 @@ class TemplateProcessor $pattern = '~PartName="\/(word\/document.*?\.xml)" ContentType="application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.document\.main\+xml"~'; preg_match($pattern, $contentTypes, $matches); - + return (array_key_exists(1, $matches) ? $matches[1] : 'word/document.xml'); } From 1c20a4ed22c791e3cc574291c1f40e53b328568d Mon Sep 17 00:00:00 2001 From: troosan Date: Tue, 20 Nov 2018 21:22:50 +0100 Subject: [PATCH 7/8] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ce722c4..ce553f05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ v0.16.0 (xx xxx 2018) - Fix regex in `cloneBlock` function @nicoder #1269 - HTML Title Writer loses text when Title contains a TextRun instead a string. @begnini #1436 - RTF writer: Round getPageSizeW and getPageSizeH to avoid decimals @Patrick64 #1493 +- Fix parsing of Office 365 documents @Timanx #1485 v0.15.0 (14 Jul 2018) ---------------------- From c12f98f69a201502f3f994d261c106c8926ef62b Mon Sep 17 00:00:00 2001 From: troosan Date: Tue, 20 Nov 2018 22:40:54 +0100 Subject: [PATCH 8/8] fix check style warning --- src/PhpWord/TemplateProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index f9a8ceb6..95468878 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -515,7 +515,7 @@ class TemplateProcessor preg_match($pattern, $contentTypes, $matches); - return (array_key_exists(1, $matches) ? $matches[1] : 'word/document.xml'); + return array_key_exists(1, $matches) ? $matches[1] : 'word/document.xml'; } /**