From f47ee2bc8a9e4e7b890e7cff1501d2d235b4ee1f Mon Sep 17 00:00:00 2001 From: DarkSky <25152247+darkskygit@users.noreply.github.com> Date: Sun, 22 Mar 2026 02:50:59 +0800 Subject: [PATCH] feat(server): improve indexer (#14698) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix #13862 #### PR Dependency Tree * **PR #14698** 👈 This tree was auto-generated by [Charcoal](https://github.com/danerwilliams/charcoal) ## Summary by CodeRabbit * **New Features** * Enhanced search support for Chinese, Japanese, and Korean languages with improved text segmentation and character matching. * Added index management capabilities with table recreation functionality. * **Bug Fixes** * Improved search accuracy for non-Latin scripts through updated morphology and n-gram configuration. * **Chores** * Added database migration for search index optimization. --- ...-rebuild-manticore-mixed-script-indexes.ts | 12 ++ .../server/src/data/migrations/index.ts | 1 + .../__snapshots__/manticoresearch.spec.ts.md | 73 ++++++- .../manticoresearch.spec.ts.snap | Bin 6207 -> 6687 bytes .../providers/manticoresearch.spec.ts | 197 +++++++++++++++--- .../plugins/indexer/__tests__/service.spec.ts | 33 ++- .../indexer/providers/manticoresearch.ts | 70 +++++-- .../server/src/plugins/indexer/service.ts | 58 ++++++ .../src/plugins/indexer/tables/block.ts | 4 +- .../server/src/plugins/indexer/tables/doc.ts | 4 +- 10 files changed, 402 insertions(+), 50 deletions(-) create mode 100644 packages/backend/server/src/data/migrations/1763800000000-rebuild-manticore-mixed-script-indexes.ts diff --git a/packages/backend/server/src/data/migrations/1763800000000-rebuild-manticore-mixed-script-indexes.ts b/packages/backend/server/src/data/migrations/1763800000000-rebuild-manticore-mixed-script-indexes.ts new file mode 100644 index 0000000000..627aa18100 --- /dev/null +++ b/packages/backend/server/src/data/migrations/1763800000000-rebuild-manticore-mixed-script-indexes.ts @@ -0,0 +1,12 @@ +import { ModuleRef } from '@nestjs/core'; +import { PrismaClient } from '@prisma/client'; + +import { IndexerService } from '../../plugins/indexer'; + +export class RebuildManticoreMixedScriptIndexes1763800000000 { + static async up(_db: PrismaClient, ref: ModuleRef) { + await ref.get(IndexerService, { strict: false }).rebuildManticoreIndexes(); + } + + static async down(_db: PrismaClient) {} +} diff --git a/packages/backend/server/src/data/migrations/index.ts b/packages/backend/server/src/data/migrations/index.ts index 98e629bf51..d3b11f2175 100644 --- a/packages/backend/server/src/data/migrations/index.ts +++ b/packages/backend/server/src/data/migrations/index.ts @@ -3,3 +3,4 @@ export * from './1703756315970-unamed-account'; export * from './1721299086340-refresh-unnamed-user'; export * from './1745211351719-create-indexer-tables'; export * from './1751966744168-correct-session-update-time'; +export * from './1763800000000-rebuild-manticore-mixed-script-indexes'; diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md index 29990dcf01..fe7c7e98ba 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.md @@ -4,6 +4,75 @@ The actual snapshot is saved in `manticoresearch.spec.ts.snap`. Generated by [AVA](https://avajs.dev). +## should search doc title match chinese word segmentation + +> Snapshot 1 + + [ + { + _id: '5373363211628325828', + _source: { + doc_id: 'doc-chinese', + workspace_id: 'workspace-test-doc-title-chinese', + }, + fields: { + doc_id: [ + 'doc-chinese', + ], + title: [ + 'AFFiNE 是一个基于云端的笔记应用', + ], + }, + highlights: undefined, + }, + ] + +## should search block content match korean ngram + +> Snapshot 1 + + [ + { + _id: '1227635764506850985', + _source: { + doc_id: 'doc-korean', + workspace_id: 'workspace-test-block-content-korean', + }, + fields: { + block_id: [ + 'block-korean', + ], + content: [ + '다람쥐 헌 쳇바퀴에 타고파', + ], + }, + highlights: undefined, + }, + ] + +## should search block content match japanese kana ngram + +> Snapshot 1 + + [ + { + _id: '381498385699454292', + _source: { + doc_id: 'doc-japanese', + workspace_id: 'workspace-test-block-content-japanese', + }, + fields: { + block_id: [ + 'block-japanese', + ], + content: [ + 'いろはにほへと ちりぬるを', + ], + }, + highlights: undefined, + }, + ] + ## should write document work > Snapshot 1 @@ -889,7 +958,7 @@ Generated by [AVA](https://avajs.dev). > Snapshot 1 { - term: { + equals: { workspace_id: 'workspaceId1', }, } @@ -897,7 +966,7 @@ Generated by [AVA](https://avajs.dev). > Snapshot 2 { - term: { + equals: { workspace_id: 'workspaceId1', }, } diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap b/packages/backend/server/src/plugins/indexer/__tests__/providers/__snapshots__/manticoresearch.spec.ts.snap index d9d8b475ec051a3951e093c1c990f50934c39553..5e308ddd22210d4e9c423f38bee5704364edb4d6 100644 GIT binary patch literal 6687 zcmV+)8sOzYRzV}?wOvMj=U1cDi1{@ z^2XfRo!;FsyEDtoEJ+Bc33yQfFDM!jjYOV8G$^1LA}HSQ@Phn6xFBAyLV!dd2m&hJ z3*2;1_iRsf*VMie*If96%vPVOt~zy2ednAyRlTmSXE5KpwD{%k=%s9FP+y)b4&+A$ zGohlMF7ytBGWp(+`5-iuF7*zCdIz#Oy{LzlOGQgPL~ zOeS4QpCMc?A^_t*5+uSL01AM1BF!NF1%*O-1tIpYmX;QfdJqEAK*-^CC4U~k2!QJW z{1m{Gb_3E$J)hEhOGK#rY;A1?X#(+qw4j{`9Yh52&miKGY{vfGuEvtFSR$s#azaVP z6g8!!K$;2p3jmuau%wtDDfH^*g9FIu1F0{o4`zx~`}G6a{(-@4|3Il|76qD&o{J|E z;Nt|ifKWXat9u+I;?D{200DkallokP6R4L&q@)*15u>^}*`tk=Uuv+($%*o7&GNm4(Q?08*_`g91)y7~o^F5DbO(vhBLD*w zk>nv0jmAjeJxVW%!xnURXFs|q^y1eyJb%|`pTFyt7rwpm`Hf$Ce&dx}Z{4u<8y9a~ zf7Q0{-}b`BtF~Tsvn}^?tL5I*icoJpSJHDOOZJ!M3wk;i%Jmo0L;n=wE-Okh5mS?i zxEf8Q)TowHce1#zw>kUCDPs&)N{Xk0E%_)vpQ(S4MBJls~;6>1+ zwMG5_hsX!4`sSn?0)HY;=(7TlAGgpqmV0aW(u-vF%a>mN^4iN@x#5b?_BEGTj7cslN`uCIC-N zPU_oYDLJmCVktGDX>m2KXo@*&Z?S|vG*O{HfdD6uZH>R501p%3#Yqc&qapP6)Qm;! z!!^Zzw55vRzsD5(*(||7P5@5g34TrhO5+y%=5l{W`{_j#-m>cAEvqlzvTDPYRkv0`8y3!pVa1qWI4a;5*#dr@ zL%;(I0el!h4}g_B1nMdPcY3YV#5J#y$-;NQVs01lVs0}bA96nE8%&>h*s7Ye{d6j6XDa$uMp1Nz`#-jI^P41*%w*O$%d z^M}*@3_0afv{)2|?DeEs@=jA(vPy1u$f|!nfTIDN37`nzGQN!ZOtH9`DWi{oA)|i= zLq_*-h**6=?^|MSz?qvSlbf8mnMVM%=G^Qo zw%hCdY-_zANar$xdT3M%L+N6uSSn<5{b!vsy7I5KW+ZT!V@AX%fP|${HO(8rG;dBQ zS8O2U)5gEV)W{zI_%VPN0lWub584_KXv4vuBEXRZ7$U&G5@0P|rEYvKK|GO6sEQhw zH8rlqld+^$_JHpt4D0C66S9uR){TTJYte)(N0n$)Gv5{#7~MEsn3$2~zf=IO6o5Mg z;P(O$6nVy)|1=ReM+D9jfqxT$>**>!SCyZM!2Kfdk_gQ70Z00%D)p`^m+v6?iVs-t z12+4BXXq*o+?l0JL%(4fMrpPe^uCG>u(GQ!pYNI<>gq`sx-O&{II0-b5;Ybx^UzA+ zahXU#?=yo5*-WG_Ux*B)bERx=zMz-A4F}Zx#m88f_VzOX*WPAvEl7m@Bw)70vB8d(fNqIngY`(jfKgvU4qZIVuIv-%-p>&^M3_E;bZ115x7JIt`&iwh`_J-m}y|;Ya-C$1J1F)kb{}e z`G9RcfcSw~eqf$|r(@<}XLYSS+fanFyXJ?^HdNv4t_!>7P=)6kh3D|0q5Z6`vc7c9 zDgTJMYE2ifF8?axs3q7 z;chGV?MUfj0T>d1)k1C5o#sAd(zyuRI~3_+q%1Izv@!3T6?v`a7DY25zYrKJ@;1R^ zMQ#$=E3#n?r9{q(yjTRz6gjSu5n#AL6r)=E4b+z?xVH@>gH?Qk<|**EMBut{D{zxTfkP}We@a!@2JZc;fxBlg-{Xzi_7C` zDye7*HI-D=L@Zug>BAFLdf#FKoM|NiT+3B>-`fN*HrQthz+Aqvi&Q6k#%xqn-S|VE zz;fWJyK$%P8ZD1mam%|;A9ARn+0F7EHA|hZ^i<~{5q=~9n|Qmwj|;%lyxm`8VR&`C zyT28#j$>EKp@z0f$nUEJK2L!4e3kGEK!X4nX;t$lsS-2J>N2OVYmWJ^;&T4~Vd`;; zr+Qq#Qjc{E^$_nDfQJR(Ii8BVEdcNFRHVh&uc`@PULK&WB9sGY7JEb0I5g*dF!{%U zNo=}k%WefG-XWbasDy2i?PXbHJ!W2JIc=?Hq`xn_+?|jq-MAa>z@#)aYV3H#WHm0! z@pwXs)!u=5X{YSKl)Dj=V{%kgl1U|HEd=|rc3?g{!5tWJr2wqjfr;yAt#-SRe-nUr z1z=ARkVW7)?#_$YF9Jg%a5+ut-f?-}N^0==fPH;{%Fj~ZB(Re7SQZeaY5_6iK4Qv! zq%8LwC-M|WT0wOt)N)qB!M7X_oaY*L2{miijiLlZS ztnzc#(yRQyH~gHn^iDr;x1ZxSKjH^|?XTrF2Z_)r0sHV$0_I78%1a4YBmpPzQUcDA zfL@A-zw;n4+zC>zL=`m>RU=Bmo4}ACukr~=jCa?@Wv@+Pe{5`vRv$z-cSJ9&2&D%H zL+M;5)UTI9IemF4)H_lr<_lDcwheiKB~t=SWiOtU(9*+gOUUIExF059=j^Ar&{Lj;N;7dN>rb)x5@n=LHlkt75Qya^A6*2WHk{%q4q;r`_zg~(M zdKD?_6$fE9GZE$`257dh@(bc@gc&OfXvx|oEI7l@V!@{wSb80c1rOE)+3kMd4IaoE z*#&L9yt_jrU>+~;&Op4Ad3kp?O2B#^$Zp<&pZ~!F+2_o4tnSMWmh?iXXGO>L@Xhv zltergQ)BUIh|c9sMq{#;R5VSECS`gvZ`Y#pANlBftOy)00vT(E{HuI)zTL_^{2d>i zQ&xul={{iC2Yg}D=sfnwSi-50WkrhgtcW;CIaqWP6OJEd!13!o1{QTr299^-m_NwG z-+y7?@8?+fdrM9Hebf&;$;01Q{lJ?%C%0Y#T6y?8M*k_b*hrf47 zz(yYaJ|h8t7`KyKo|WS=3|H3{mWiR*gaxQpPZ^u)wR$?X)0q?OEse5O9&!0d{zzev zfwFbydL8{Vc0!C(f!B`d=qDRN5^_{a#pGxzmQ)f+HKoSN@z+hP9gwZ?z=&o-6z6f! zQIs$j@G}Uoga8+Fp}gTv0^Ci2rwH&G0U8Bhwsqr3zW`h!0N)gVySPXrJRtzj&}6L> z@y^DWBh%=R4%uwG-ek=i4p^;EXEIsiR-N>q`D{;-5y?BkndZ`4(9+sfeqL5zW>TzoP_$U*!mAeD#~cJ+2h0EBcwR}#+24UeRVg{m zgdFR9HZq*4jH3ybIg<4Z+y;s84Igl=uX?$1o&DY91Mc&2mi22SU>(n`GSaMnz+15& zlz?ARM0GULpCsTdim09@G7cckqMV@yIw@KU07nIYVO}owcLKo8oLp?--T<(flZWkp zGypsq08R@6R|bL2!RiV#_!)|$GM@6J5FQ}rf|v@GQ=umG|2gBXR?H9Sp}}l!X=tES z8ZOSCH*YYT)6<2y>EYpd<@G<3(Tmys+`MeDIHJ#6&>fLuNo`(nFkKvo^yY_#(z#4A z5?+)DFYFGdqUMiyc;Q0hN2)ulDd9zGIHiUcs1f5mk?^8qSX&TI#f|cX$AlMZM#+K& zVJ#V6q=i$-aO#-wqHg1n@go&pxG1c}DvcO4TFfXk+FxXr#EnK$3k@1AY7`nJsc`C8 z^A!uiTHI)7LAObq3~LLG9*$O5jYmc|+%2b!L1_!k>ay7%HJnO?Qwgg<;}r?xrP?t@ z%M0V-1r?xeAmqKEiBaP|&{kQfK2~0*yhX-dj24y=zBW;JdBI9~=QxB`md}rL%`g9J zq?8QhGkVwjF2hdo$eyQbDDE2UVapC4rRr=R`Do&EYYOb)Jx5MTW)Gj&b0jUdu3;Wb zOVQyyM=t0#s#3ya*Q~%{*Qj`n)~S{91TeXOd)~uE^kUh{&#MT*P&+@KHED}vE zjwa?uiSINt<= zH?>Xgn3hb)@tC5hvZ`vyq+B+=FJ^V)_MOy?sG3Nqa#D#UqA4w*CCZ0SUuAV;U;obN zhMJ70;;I%+7(9u}nqoO3r~4V*xMZhvBau?#v1HUZOAwE0iIkEo%f@}IZoEFBZUl+X z4iJ1o0xp$!-b`S-1iT>uE!Ocpdtphsi0Id$l*HPp%U2l4<4tS~#c%u$zst3aLKzBXR zS5H;#biHYHJ@EN@;MRKJS(x+dVzCgAucs_MbqEI!H;^=0zCbWo0R(b&-p zzV~_rb3egEyvQixF(Z2Ai_rR-*zWG7O{{3&ZB5=j?UPL$kM-|OwLRATo7rcacN2~u z+sx*8wwcB8FEx8}{Pt!J$DeMl&GC13e~x#zusOc8g~jnJTD&=aXA6hp&$QI$_{Grj^6-O|7*#{%q@R&++;hY>v;H!Q%Mh8QvUUF@wYLwKHmS z{1-DA91jxV%^ASoXK;dmv)X`A8z%^;wE;&_M6SJ^#cjZuZJbEqiZaGgg>+aFH%G;G?CB_1ly~LT4|zv+kpeyYX$v+L|E7ke3&9?qlg*? z+JRgC(zhg zO>~HB*R&H*D567YqLVv;(D5L(^b~ufG!(%#F&5mnyN@TKHGwIQJ3)`gTuGt!0ETTl@NPHKL(6v2L zr8xBbd8m`@sZMb_r(ym-!%;T=m2+c?=kDU8Y~|>-5zS4;Wm!%n5_0+IDl6;nV-wB# zvo908gPV&H=oEo{tjjq*E&`vja`SKHXa7AQ0*_c13UVTsAGb39N>*19R7c)rJE9j+>LN25a4VP9}S2=}D6rW!l85&9#R)o$=502;_A9#9e+I`?j zNyei}Qqf|mgq%#OiuDn{ssF&!Nd))=0haTx;;_Gd(;@%|^RMEVDllBk1jF)#N6ZPg zZ{9G+9Z823FK zY}NBlMuV+C5leq^`I4Sp zzRqUSUj-cDe;{o;0p1`$)2`E7V66uiY2$e*MEIV zoc8l8PP^~B05$-47{GHJ*$)z*NPs#5e1HIl^PP2i8_wUGAIX)>zwDE6pLEE-Zy;Nu zrC(gCuW+KTae9Q;a#=<$8b`~O6RgJ80|9=>`S6msi2(Om-yLLUg;y3F<17Unb5SqY zb8&=F37`xMzCqeb9!+Cz6G2>DAQaJ~q9R|Jeh z_s`H(I>s4uQBra|9@mmeR5o(8Dl4AITJgMc=$Z*R(Rob4d3=St=E{kvW~{oJ5>;bS zW92m)EV9z))-rnV(ByiMjLWH{oX}#bYNa#2#_GY|UMXuvQiGq-gB`2*kRK@efi?7Z z8T_sVf14jL&d~+wq&A5r6%LSqgJ`lkSBv)7XqM8X%`VbS5^$dc{1;8y;(EDpLgRmE zvNl(X_BoBC=*&4+KAU(&0Qgz}xQ!-t<*x}31%O9svb}cR;nX>mRloeLP{*f3^TQEy z-H!~Vmq*OC-+^`80xYa+3VI#GteommoR2G~dZzR?jERovv>I4MjrBbbaYu**G{6+IH_XFKoQA)>c_Wj^C<7%r*)DmXE2&Ms(iMhWlH}K_dP;fL{W5nQl?!!m+<6pD$X5e}IVc zT>6d>zbS!UYHc$|`iC;-3V1rK;@tp#?&XzDIZv!Za<&R<-YUE-TN()UjucDzp|Uy; zrH6+rkxteJz8faY+v6R4@%7~MgJu)w0$4Uy)?V&xVrZmjqN?~KD^hwtsuW)$;!XF6mB25;U zf)&QIn&$!j%3>6qb}}K{0bnCv(aKFc&RE6Ywh|vWA-4YyxI=%Zidsvo2(Ra{&C|a> pMZ+}TF_~fd&6UmmYY%GQ*#dXJMQeTn#KPI!`dm z4W=4r1|N$E00000000B+U3-{ZMV0^6z1{cr^t>M#AP{IqT3+d-Z@+I0j||Bu57$Q` z1W3&GOy8Ml(=$DE_aqYn`T;5;qOd+9qDTZm1x3XuLIgK%09}v|gavWk6?T<~3xa@( zyTGPy-|o3x_g2@u62^S+2f0&q>UHXz`qepgs(Slie=a|;vG}R47^Q3}XG|n znZV{kwqyh{`GN5fW3&|5oG)z57Dv-##o>IZc**umCS6LOEnP1|1^tish}sL_9RT_P zYz6Qc06PFY3ZRoH?L>K3{|AhL5+T*kwzf8qW)KCWg^*YG7V080h_{{i%zkaiX3U%( zGMS?esaTE@}^W3;rvUcvWm z`Q5eollAzB^eX~9NvsiiR}>2AEkvq(wYIjJL@yBFFAR~)5_L+zAyO@ohb4+jz}pxi zFH3Zm1gvL>6qe{h3AltIQfr9(AOxfdokXgz0EK484g8ya4<#Ol2SSSu2%5&_O90PS2W+?&Ub?0VZvAwe1 zA7!og!|BmX&In9OVIW-$6ibEd=+L?ACs+Q9tr<~|x6Ozg29U5c%03jr4n+cKB;@pR z!A3$pNdG0KMt%pt4*>iQ!0P}GBFxN6wKMA@-V^~&B)|v(K1zUV*&+>YO593$Qela_ zA)Bducg7OQL_89YX?i@S$CA;co-zgDZac_qDCWlt14dc1Z5hg7*2rawO#S}h?9gy7 zJ2YG>nn`N&RJ)OgYkD}LY2ipXtedR`B~CZiN;Bz(k9aSafU6{6hXnkm1o&izfwF_= zXUV{N8Q3HPpOS$a*diWBksr&zJu>i&3@q>fCwiD7en*ij_L6+s1Ki>Pc6)#)*dmR> znN`YST=~#Sk{pol|Uk;8Gd5P6mD`13wpICdJC{-kto=+3ki=m;B(5GPLO;lvmZCqCozj#UMKyg~)ut_lEoKn1dDeL$8! zq5_wzz|CrXP?qmkfrt0j`1}0<%#sk8&1^#4^3S&r(@{wkxS7J`^yr)cx}#wypff$N z6PzEJVgD;u_mQ<$x1}BRO>X&i+x9_P3?L%fIuLWiu)ON^qTZ5m05<^mk(*~$njZM4 z^kgcj#Zoc)5l*J!(L~vD0S9%Doe}u^h<6UY zSzsDb2g3!jz%;r>2DZ!fR^s_!hEx>zjyDzfO$9y>6!=UTSSM282^sjHNP)j31J_Sm zftzg#9N<~`bE?7>;6A(-xchVYes|Qa9FfZY`KR<+A{x_E$%vkar;_n_A{wi&^uZY_ zy=M&p&LO}B1h`J9@Sax)@Hzn&NWc=Yvdc^-Jajgys!spVX0RN%>Tb-gyR_yhD{lGp z>FqW(G`m?oqvomeRj%snBhvRJV5jKx_hAWmTy*+N7l!Aid-_}D>NHNJY-(6h3GGdl zz{d!1i&!PR63{3CM@qo5S*k?)xjp9e^(;33jW~?|H%vWNyQ;?pJoVVlQ4jeZ3AkSZ zo)W3ZD-!U!NJU!dc~xBibF%;|D#93mX0kg}O+$0u1d}%oOrrBeTMj5Nad+uVK_x0X zvO}yLS%11TFl>db^^Xq?W+x^$V;5Qaquh7^p1`E^c$l7eM74NK(_*niBwGIj=9ztR z0#oirREuiicqEyOq^yNt5$^=%9Wy+Ek+({~MSC%EJFC@B6Y{ST@R|f1ECZShoGLtd zk%wepL- zfiHLkYv~RzaHm&bHb3A6e&MZWHv5RwtpJCKLIRd5KwK0Ouu1_=7lj0@SAYSANPO}j zam)$gkAx%fP&ginB-{z8{J5phNMhW(Hm&{I9QLPzZD9-B%Bg|##*M<3KsuKTq(?J> zA)^!+H6}`df$>5yUtmJCE0Cvdm=nmmxC(j74*}c_;5h(I1fUzb6NIp*yq^G>y=0#z zz;_7nGXgv%hCXjS%+XTOxSousw1}=n^++VC#o0AyLy@x8{L@1R&_amJL!>1$LL_k~ z<9a+13CH4EGO9-tT11~hC)2!6eqyGudA9`YmVjr(u(?_W&X$3q3|u~?AU}DR4BWRD zI-eE8=9wN~od?+D0Y2#gZkjc0(mzAmls4Zh?b=w@tB|Qzp>!@6N{?njLq;h?^(s`> zD*?jn<|5299MEj##TUf*2s2d{u#&Z3Sa7zN$AS-Xu=IK!3+}56vd?;f7eydTqYFAj zad*cmz*14%9YwtNh~n;URDfGVAiH}ne*UWnWS>%iJtC0xsDOXk$NS}3F*StewRy_`5{`J|x25&nv*SBK-ZH0^A|O z-zOB{H`6w9%d>K7hGXhhgk@$ZHe&&**Hg}+dcB@b?RMr2d&{D1jYT|Sx_-wCIS$G; zoac7+lb(doTY=Y2>FOsRK@wV6PerwGDw>QWlJQhLTK2#0;S=&sk zvnj^NG})v>Gwb%7ta;G}tNwH*lPzWQqv@QPcd*Ry5`GzkS^!-%kC@V zrMC38k#u2WCck-f!&t%Clr=V+6t~zYTCIEGHH+>;HUhVc<$t#2S3`1+u%S>*NKOkO zEA4sXW0~qWnqh56vaXI>ACbP`0j~4ZE?17bzdJp^E{|YYzeWMJi_9t-X8j$}ihZ8~ z{G1_bV2S>y053B{ewK({Kw8KcLyc@uw5|dtslb>h7W*qI@HIg!wsf}&>=wjfdmmJR zM^)e~A8?fq*zKz=&@PTpoRsmDD}{6~F&D&Cpd1P{qyI0ScCli9#0ccFqZXG28crXBBr8Ih&CeoBtFtyTbVntAo z(Rx<&nZ(JU-b;HpSv*d&Xg8cSr|6*cUbDDn_9q@prGlx1RUvI6K^xUip*8o$f-9;( z+epakJ`Va*uHcY%Sj(=~-6(mxh##R!%ETe{u5X9 z(V~nnRZ3RjuxC=dCd<@HS*46@YpqtqCQH^z8puKDByw9jIkec z3;<_Yugr{y93@^;W+tkRlAA3@Nh2ZuK|jlmly(9$wxW@cmGskNuHu^saG@2YwVMFH z79Z`uws(i-k$|WKoFM@jcJI{aDDn{rpjQg-kbqxEz#rHmU5+A6GSDFdE37d30&CPS zauoTr3|wt_=%18kP=R|@;8m8q$olI13^E~=LZJ;Own#f zOBefrkNJU{{lJqf`QeV1PH6;AZ3K));6shTwnpIgM&R*ArsxrlmR@QE{?-UAY64a_ zG2}-(TH4VB+}Q*?)&#uJ1T-`Q$29||H8Vwz5k~PbmZ&F_A7GtwjETngW{9oV6S(II zCgO!A5s&N9D?fxb*vvO~FK^~~`@Y@mZqq*6EU;Mr)?D9WJ)(tw$N2!^_{tVO$FnUw zj(@Vno#VH)2sr+DOMQ;Ndf;=sua(d7jjcS6U)k!;@g1!Kjz7^_pX0B$9`GDLrj5_> zcee33UTAaY__j6y$9J~X=lGLt2Rz69?R<_eZRc@(O}jhCx3mj5erX6l_ioo0bge=Q5#FNs1tZgXFaE%k4U|pz&jYC4u+_4xDy!d1TN_W zuI>bG?F4pr0>A16Ug!kWE?`j?5bLU~sFQ8&v@YP?4ADU>(O4I-i6J_eCAz8$*v1eY z!V=xm1$>twTF4ST)CD}o5FN_;Z~xK-yxIkHET|C0PJ9hj$%jn=mOwZ3xMYt!XR6l(hW3q*AgA;I5q7C zA`H=SEYW+qfwLH*<5{9yH&9@R7CWS&8`#DWg;9waOE2)Y-HC0Jf?WuYt)mKTShto8Qt(@F2XY=8&NL7f# z&V4c+4yU^{4wvcRRXeTGMMAUHnpuy|UD#$dcP-ZFqM=Ag3&r;12pz{0HHyQ|pNl$a zuId!4#564Xd$`IbesXR~{@ne1m96aErrz9SOw+VPBB7P9uJWS(-aXT(zsh5R-xJ1S zsNFJfnDscvdu8AQR&4&w;^@D7W#9qpK|z7%^1W8%U&)HiztIES>j9o-qX3&7Gb@M! zakJU>t@R~)`PSDl+?cs{a$@ERUV-(s&0F94+F9qNzs=#>+5o@_NUZn!t zRNxjB*rfuGtH5(A;PC+qeL&a;^!b4GO!$byx-I&EEew&vy1m*5e3>C~Shu(PfV&tX zhjsf4AMne%AuT>4{nZD&#t^aAt#o(;a8yGrQRnn*fj4*2^J^C!71?I9-Ggz;Pt7(f z53UlNu*3>ZC>W*j!f3!487pnEo^lG9D88&XJ~EOnYzb^i=f(|ZV5#)-oV~|!JpdXp z`pE3wkjm@O1bPJ#D-eol<`^Q%U?=j#A&1#mxrrv$R^BOaLm4Fq^A z0hWu6b@mv}JCGkAEt!8+Zo<9aCjXw{Y>AbAd84t#j=uEvh^XeWj9fB}nyWWh>CpoL zzAO0QC3z5lS22PfNwXB8i2oUm?WZ--m___?xOZQK(MY^WxbBUz1SS+R|BVmokYE@S}nYZG( z#n817a)$kyg8ll6aLv^cVV$nJdL$f=hUv;{R#@bP&t1#u!B1z`gJeugCAEYejmNEU z#^-rGIMgjOQYy*LrrbN4mBz2nAu zM`a-`%sVP;t2SCl4;9j5oJ+QCt0vMTV>yG~Nika$HOIU6GqvX4vN}6DlrsXw@%~~d zTN&?88O0QebzKd?2*LMZn3=krBt)2|X)Tf)8%;x(nuO{FW$QQlDITnSfWRlXCzKf76E zbIud%f?P#?b$9jMoGlFp2F8n}{76}qN77?sRWB#+$GscpMpOFp`P?)$W$)hT3VRhJ z<3$ruAF7(nf32F#(h=6HIQ304i9VWh3QMMNdV8+j3uK%(p3W6* zxfi?T`iS&(0^G_r;jjjz-w@z;ESWk7D~xL~*US6W#V9=QWJ3BLfIGyBR<6SSo6h#B zc*P2O5V+T7j5^<#G2hPM^qBr?`IdcwJM3>%F>8tC-F3aPx%OLgG)%LN*$mTbu56XR d4r0!nD*$Hodi(tBwe6t${{eBfWJ{-k001|Rxn%$V diff --git a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts index b68919554e..395082edb9 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/providers/manticoresearch.spec.ts @@ -33,8 +33,8 @@ const user = await module.create(Mockers.User); const workspace = await module.create(Mockers.Workspace); test.before(async () => { - await searchProvider.createTable(SearchTable.block, blockSQL); - await searchProvider.createTable(SearchTable.doc, docSQL); + await searchProvider.recreateTable(SearchTable.block, blockSQL); + await searchProvider.recreateTable(SearchTable.doc, docSQL); await searchProvider.write( SearchTable.block, @@ -163,6 +163,135 @@ test('should provider is manticoresearch', t => { t.is(searchProvider.type, SearchProviderType.Manticoresearch); }); +test('should search doc title match chinese word segmentation', async t => { + const workspaceId = 'workspace-test-doc-title-chinese'; + const docId = 'doc-chinese'; + const title = 'AFFiNE 是一个基于云端的笔记应用'; + + await searchProvider.write( + SearchTable.doc, + [ + { + workspace_id: workspaceId, + doc_id: docId, + title, + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.doc, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { term: { workspace_id: { value: workspaceId } } }, + { match: { title: '笔记' } }, + ], + }, + }, + fields: ['doc_id', 'title'], + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot( + result.nodes + .filter(node => node._source.doc_id === docId) + .map(node => omit(node, ['_score'])) + ); +}); + +test('should search block content match korean ngram', async t => { + const workspaceId = 'workspace-test-block-content-korean'; + const docId = 'doc-korean'; + const blockId = 'block-korean'; + const content = '다람쥐 헌 쳇바퀴에 타고파'; + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content, + flavour: 'affine:paragraph', + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { term: { workspace_id: { value: workspaceId } } }, + { match: { content: '쥐' } }, + ], + }, + }, + fields: ['block_id', 'content'], + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot( + result.nodes + .filter(node => node.fields.block_id?.[0] === blockId) + .map(node => omit(node, ['_score'])) + ); +}); + +test('should search block content match japanese kana ngram', async t => { + const workspaceId = 'workspace-test-block-content-japanese'; + const docId = 'doc-japanese'; + const blockId = 'block-japanese'; + const content = 'いろはにほへと ちりぬるを'; + + await searchProvider.write( + SearchTable.block, + [ + { + workspace_id: workspaceId, + doc_id: docId, + block_id: blockId, + content, + flavour: 'affine:paragraph', + }, + ], + { + refresh: true, + } + ); + + const result = await searchProvider.search(SearchTable.block, { + _source: ['workspace_id', 'doc_id'], + query: { + bool: { + must: [ + { term: { workspace_id: { value: workspaceId } } }, + { match: { content: 'へ' } }, + ], + }, + }, + fields: ['block_id', 'content'], + sort: ['_score'], + }); + + t.true(result.total >= 1); + t.snapshot( + result.nodes + .filter(node => node.fields.block_id?.[0] === blockId) + .map(node => omit(node, ['_score'])) + ); +}); + // #region write test('should write document work', async t => { @@ -189,7 +318,7 @@ test('should write document work', async t => { let result = await searchProvider.search(SearchTable.block, { _source: ['workspace_id', 'doc_id'], - query: { match: { doc_id: docId } }, + query: { term: { doc_id: { value: docId } } }, fields: [ 'flavour', 'flavour_indexed', @@ -232,7 +361,7 @@ test('should write document work', async t => { result = await searchProvider.search(SearchTable.block, { _source: ['workspace_id', 'doc_id'], - query: { match: { doc_id: docId } }, + query: { term: { doc_id: { value: docId } } }, fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], sort: ['_score'], }); @@ -263,7 +392,7 @@ test('should write document work', async t => { result = await searchProvider.search(SearchTable.block, { _source: ['workspace_id', 'doc_id'], - query: { match: { doc_id: docId } }, + query: { term: { doc_id: { value: docId } } }, fields: ['flavour', 'block_id', 'content', 'ref_doc_id'], sort: ['_score'], }); @@ -319,8 +448,8 @@ test('should handle ref_doc_id as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -371,8 +500,8 @@ test('should handle ref_doc_id as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -416,8 +545,8 @@ test('should handle content as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -455,8 +584,8 @@ test('should handle content as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -497,8 +626,8 @@ test('should handle blob as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -534,8 +663,8 @@ test('should handle blob as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -571,8 +700,8 @@ test('should handle blob as string[]', async t => { query: { bool: { must: [ - { match: { workspace_id: workspaceId } }, - { match: { doc_id: docId } }, + { term: { workspace_id: { value: workspaceId } } }, + { term: { doc_id: { value: docId } } }, ], }, }, @@ -682,8 +811,10 @@ test('should search query all and get next cursor work', async t => { 'id', ], query: { - match: { - workspace_id: workspaceId, + term: { + workspace_id: { + value: workspaceId, + }, }, }, fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], @@ -708,8 +839,10 @@ test('should search query all and get next cursor work', async t => { 'id', ], query: { - match: { - workspace_id: workspaceId, + term: { + workspace_id: { + value: workspaceId, + }, }, }, fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], @@ -734,8 +867,10 @@ test('should search query all and get next cursor work', async t => { 'id', ], query: { - match: { - workspace_id: workspaceId, + term: { + workspace_id: { + value: workspaceId, + }, }, }, fields: ['flavour', 'workspace_id', 'doc_id', 'block_id'], @@ -780,16 +915,20 @@ test('should filter by workspace_id work', async t => { bool: { must: [ { - match: { - workspace_id: workspaceId, + term: { + workspace_id: { + value: workspaceId, + }, }, }, { bool: { must: [ { - match: { - doc_id: docId, + term: { + doc_id: { + value: docId, + }, }, }, ], diff --git a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts index cd8ff47d1f..e90a781e02 100644 --- a/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts +++ b/packages/backend/server/src/plugins/indexer/__tests__/service.spec.ts @@ -8,11 +8,12 @@ import { createModule } from '../../../__tests__/create-module'; import { Mockers } from '../../../__tests__/mocks'; import { ConfigModule } from '../../../base/config'; import { ServerConfigModule } from '../../../core/config'; +import { Models } from '../../../models'; import { SearchProviderFactory } from '../factory'; import { IndexerModule, IndexerService } from '../index'; import { ManticoresearchProvider } from '../providers'; import { UpsertDoc } from '../service'; -import { SearchTable } from '../tables'; +import { blockSQL, docSQL, SearchTable } from '../tables'; import { AggregateInput, SearchInput, @@ -35,6 +36,7 @@ const module = await createModule({ const indexerService = module.get(IndexerService); const searchProviderFactory = module.get(SearchProviderFactory); const manticoresearch = module.get(ManticoresearchProvider); +const models = module.get(Models); const user = await module.create(Mockers.User); const workspace = await module.create(Mockers.Workspace, { snapshot: true, @@ -50,7 +52,8 @@ test.after.always(async () => { }); test.before(async () => { - await indexerService.createTables(); + await manticoresearch.recreateTable(SearchTable.block, blockSQL); + await manticoresearch.recreateTable(SearchTable.doc, docSQL); }); test.afterEach.always(async () => { @@ -2311,3 +2314,29 @@ test('should search docs by keyword work', async t => { }); // #endregion + +test('should rebuild manticore indexes and requeue workspaces', async t => { + const workspace1 = await module.create(Mockers.Workspace, { + indexed: true, + }); + const workspace2 = await module.create(Mockers.Workspace, { + indexed: true, + }); + const queueCount = module.queue.count('indexer.indexWorkspace'); + + await indexerService.rebuildManticoreIndexes(); + + const queuedWorkspaceIds = new Set( + module.queue.add + .getCalls() + .filter(call => call.args[0] === 'indexer.indexWorkspace') + .slice(queueCount) + .map(call => call.args[1].workspaceId) + ); + + t.true(queuedWorkspaceIds.has(workspace1.id)); + t.true(queuedWorkspaceIds.has(workspace2.id)); + + t.is((await models.workspace.get(workspace1.id))?.indexed, false); + t.is((await models.workspace.get(workspace2.id))?.indexed, false); +}); diff --git a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts index cdf0ac2de5..d928b76623 100644 --- a/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts +++ b/packages/backend/server/src/plugins/indexer/providers/manticoresearch.ts @@ -38,6 +38,17 @@ const SupportIndexedAttributes = [ 'parent_block_id', ]; +const SupportExactTermFields = new Set([ + 'workspace_id', + 'doc_id', + 'block_id', + 'flavour', + 'parent_flavour', + 'parent_block_id', + 'created_by_user_id', + 'updated_by_user_id', +]); + const ConvertEmptyStringToNullValueFields = new Set([ 'ref_doc_id', 'ref', @@ -55,23 +66,20 @@ export class ManticoresearchProvider extends ElasticsearchProvider { table: SearchTable, mapping: string ): Promise { - const url = `${this.config.provider.endpoint}/cli`; - const response = await fetch(url, { - method: 'POST', - body: mapping, - headers: { - 'Content-Type': 'text/plain', - }, - }); - // manticoresearch cli response is not json, so we need to handle it manually - const text = (await response.text()).trim(); - if (!response.ok) { - this.logger.error(`failed to create table ${table}, response: ${text}`); - throw new InternalServerError(); - } + const text = await this.#executeSQL(mapping); this.logger.log(`created table ${table}, response: ${text}`); } + async dropTable(table: SearchTable): Promise { + const text = await this.#executeSQL(`DROP TABLE IF EXISTS ${table}`); + this.logger.log(`dropped table ${table}, response: ${text}`); + } + + async recreateTable(table: SearchTable, mapping: string): Promise { + await this.dropTable(table); + await this.createTable(table, mapping); + } + override async write( table: SearchTable, documents: Record[], @@ -252,6 +260,12 @@ export class ManticoresearchProvider extends ElasticsearchProvider { // 1750389254 => new Date(1750389254 * 1000) return new Date(value * 1000); } + if (value && typeof value === 'string') { + const timestamp = Date.parse(value); + if (!Number.isNaN(timestamp)) { + return new Date(timestamp); + } + } return value; } @@ -302,8 +316,10 @@ export class ManticoresearchProvider extends ElasticsearchProvider { // workspace_id: 'workspaceId1' // } // } - let termField = options?.termMappingField ?? 'term'; let field = Object.keys(query.term)[0]; + let termField = + options?.termMappingField ?? + (SupportExactTermFields.has(field) ? 'equals' : 'term'); let value = query.term[field]; if (typeof value === 'object' && 'value' in value) { if ('boost' in value) { @@ -432,4 +448,28 @@ export class ManticoresearchProvider extends ElasticsearchProvider { } return value; } + + async #executeSQL(sql: string) { + const url = `${this.config.provider.endpoint}/cli`; + const headers: Record = { + 'Content-Type': 'text/plain', + }; + if (this.config.provider.apiKey) { + headers.Authorization = `ApiKey ${this.config.provider.apiKey}`; + } else if (this.config.provider.password) { + headers.Authorization = `Basic ${Buffer.from(`${this.config.provider.username}:${this.config.provider.password}`).toString('base64')}`; + } + + const response = await fetch(url, { + method: 'POST', + body: sql, + headers, + }); + const text = (await response.text()).trim(); + if (!response.ok) { + this.logger.error(`failed to execute SQL "${sql}", response: ${text}`); + throw new InternalServerError(); + } + return text; + } } diff --git a/packages/backend/server/src/plugins/indexer/service.ts b/packages/backend/server/src/plugins/indexer/service.ts index 66d1d0eb91..8e75dde571 100644 --- a/packages/backend/server/src/plugins/indexer/service.ts +++ b/packages/backend/server/src/plugins/indexer/service.ts @@ -14,6 +14,7 @@ import { AggregateQueryDSL, BaseQueryDSL, HighlightDSL, + ManticoresearchProvider, OperationOptions, SearchNode, SearchProvider, @@ -130,6 +131,63 @@ export class IndexerService { } } + async rebuildManticoreIndexes() { + let searchProvider: SearchProvider | undefined; + try { + searchProvider = this.factory.get(); + } catch (err) { + if (err instanceof SearchProviderNotFound) { + this.logger.debug('No search provider found, skip rebuilding tables'); + return; + } + throw err; + } + + if (!(searchProvider instanceof ManticoresearchProvider)) { + this.logger.debug( + `Search provider ${searchProvider.type} does not need manticore rebuild` + ); + return; + } + + const mappings = SearchTableMappingStrings[searchProvider.type]; + for (const table of Object.keys(mappings) as SearchTable[]) { + await searchProvider.recreateTable(table, mappings[table]); + } + + let lastWorkspaceSid = 0; + while (true) { + const workspaces = await this.models.workspace.list( + { sid: { gt: lastWorkspaceSid } }, + { id: true, sid: true }, + 100 + ); + if (!workspaces.length) { + break; + } + + for (const workspace of workspaces) { + await this.models.workspace.update( + workspace.id, + { indexed: false }, + false + ); + await this.queue.add( + 'indexer.indexWorkspace', + { + workspaceId: workspace.id, + }, + { + jobId: `indexWorkspace/${workspace.id}`, + priority: 100, + } + ); + } + + lastWorkspaceSid = workspaces[workspaces.length - 1].sid; + } + } + async write( table: T, documents: UpsertTypeByTable[], diff --git a/packages/backend/server/src/plugins/indexer/tables/block.ts b/packages/backend/server/src/plugins/indexer/tables/block.ts index 828d8094ac..99dd29234e 100644 --- a/packages/backend/server/src/plugins/indexer/tables/block.ts +++ b/packages/backend/server/src/plugins/indexer/tables/block.ts @@ -150,6 +150,8 @@ CREATE TABLE IF NOT EXISTS block ( updated_at timestamp ) morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr' -charset_table = 'non_cjk, cjk' +charset_table = 'non_cjk, chinese' +ngram_len = '1' +ngram_chars = 'U+1100..U+11FF, U+3130..U+318F, U+A960..U+A97F, U+AC00..U+D7AF, U+D7B0..U+D7FF, U+3040..U+30FF, U+0E00..U+0E7F' index_field_lengths = '1' `; diff --git a/packages/backend/server/src/plugins/indexer/tables/doc.ts b/packages/backend/server/src/plugins/indexer/tables/doc.ts index 1647fad7e0..ff5ce52e58 100644 --- a/packages/backend/server/src/plugins/indexer/tables/doc.ts +++ b/packages/backend/server/src/plugins/indexer/tables/doc.ts @@ -109,6 +109,8 @@ CREATE TABLE IF NOT EXISTS doc ( updated_at timestamp ) morphology = 'jieba_chinese, lemmatize_en_all, lemmatize_de_all, lemmatize_ru_all, libstemmer_ar, libstemmer_ca, stem_cz, libstemmer_da, libstemmer_nl, libstemmer_fi, libstemmer_fr, libstemmer_el, libstemmer_hi, libstemmer_hu, libstemmer_id, libstemmer_ga, libstemmer_it, libstemmer_lt, libstemmer_ne, libstemmer_no, libstemmer_pt, libstemmer_ro, libstemmer_es, libstemmer_sv, libstemmer_ta, libstemmer_tr' -charset_table = 'non_cjk, cjk' +charset_table = 'non_cjk, chinese' +ngram_len = '1' +ngram_chars = 'U+1100..U+11FF, U+3130..U+318F, U+A960..U+A97F, U+AC00..U+D7AF, U+D7B0..U+D7FF, U+3040..U+30FF, U+0E00..U+0E7F' index_field_lengths = '1' `;