From 7bd895ef420b1a5dbf62349a4e4607aca8b06678 Mon Sep 17 00:00:00 2001 From: Yohan Boniface Date: Fri, 9 Sep 2022 22:33:36 +0200 Subject: [PATCH] Add BooleanConverter to map proper boolean cells to a Bool column Note that only proper boolean will be considered, but not integers nor truthy or falsy strings. --- .../grist/imports/fixtures/test_boolean.xlsx | Bin 0 -> 5249 bytes sandbox/grist/imports/import_xls_test.py | 28 ++++++++++++++---- sandbox/grist/parse_data.py | 16 +++++++++- 3 files changed, 38 insertions(+), 6 deletions(-) create mode 100644 sandbox/grist/imports/fixtures/test_boolean.xlsx diff --git a/sandbox/grist/imports/fixtures/test_boolean.xlsx b/sandbox/grist/imports/fixtures/test_boolean.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c88acd990953312b218f64819ba54f659e76d7b1 GIT binary patch literal 5249 zcmaJ_by!sW5?(;MYeht0Nu{MEq!nRVYUz+}*j-$@JEcV=M3xd%8U!RHq+t>1kWP`1 zP(z4!ZOpFO*OocWzIGw;m2Ga4#5S7-nP1Ox!y+*>+;OG1qKZ;G^ba^dGY zzZNI8sC^J13E%dK<~k6WmL;V*^z=e!g2k-aI%k|{>xF&#! z*7j6YK4Gm|Zdd`NWFwFdbvy)5ts+-K4K`w^9@*;Vj0aq42vvnSiolhb7`&y07W7!2 zfZV&lkX+vau?8wq>u@5k8Ee0Hy|dfNWYB8w-7$@{&0{|L->n3u{ z9iC479tfnP1p?v7=LvI))#`>%2~ad1a9^2$@tQbXPr=cPO8`t$6m+L0DFPj%X}We& za?WNbt@Ce9Dv*nc6VEoJc^S?;cl>grML%1ouhqQyuEsmQ(8>0VDNBje+aFo!ycL(uwLpvY3wm#)ns8L4Hwb(3&j z9f@t?*Bh?EG!jJIg32*yd5z}JR}&RB%nPTVukcflEOY)sMaqvO1vUH!apj5#H|YZ& z_K(a2-cw_XkfW$BbMA--PJ8Jj4ZEJHs6wsqBu>O`-QG64*XQ4mr8s0I8jJTyf`PR4 zo4epo0eW@3FR=8%xg=FV^?;-zhV@{7sHDwaYnTHudEY83Hu#xApX;H>%~K>DAmC)|rCe z1$AH&I_d7tErB|gLR0n%0Vhr*SJGWGpsFf&)P5X!&iXKY^lvKh8WvB9LJ+JDwKxb+}btX5K0PV+fGI?~q`N za^&zc7_X=NZ9%ZUHDM1SFRz%WY%&PsHo{X3worgk4oqM37D*MP1pD;b1rG2hK=sSg zL}jJ2o`snJU*362s&#nMDoI<&5KAr*>M-Dico+Gd11=bEh7**EN*20T`WV0w2_`XG zV+}APrTaz03C0Jqq5BFw=Tn)%JI3ntamp~(8tP2x)?|~kxLRdh#P&L>*UAsSvS%RH zrfN*<`YTZl%>iVGDg`^R7EfB6Ps$Fg+=D!B+mUw;W7QFH#f^}5V36y;^%~AbW50C6 zNjueJ6HM?kC7>~Rtx}Q@niW* zQfW$_u`3w%l+3qjP;rPmtDr0J@vVtC>=&Pz`a#zO8GY*Uy+PxbqvTLio>NoP+}Y?E z>KFZ@I^u1%-I;BG#P@;{m!MDnKRC)luw7_rnm?|{$GUTQ)H7>S+VI+DI(O_bh@9qz zFq)qkVEAKxO>DY34nMMGC}anaIiIOfHtE* zTTQDZ5~K~Dejtl_w1jO+oHuovJkH{2zOmwtvnRtcq_9k0YzWcPEKEyiYyn+CIpOBW z399U?au5xnq;~JjKSH}}JA5d3uv4*H<(Zp{(_qX-Px{h5!OTQUvS?RW!cTKGH!gtG zUN{?C)2hk^Z{A(v)meeaB)9vt@)c@1g2McEaU=F#bvBL2apmEj(2A3aTMq&biK>HW z#}Q!%Q$7vs3X5?oLxuic(#yds^QpM>g9e_`5=Jo(L&<+WXzt&Cf%9a3EBfctq%^$# z>u00Ero^~bZ|jO;J!*|m$B;&pZ>CdDduc&(3Zj zwd|P83y4U5Iw(PuOy*`eOSNcFXe`MtXE6`ukp=77(6RCTAf9Jp3TqLvcn?m^cUSy&UveCdCGF_K!)si z+|J4!_C=iJpbebEHJahC->=~YrGI3vbdnyaCAI^W4CR-n>dC>P`2sk^@JA$GubqLG zCI(D~}d5_b$1D+hbSi>3~p>0wd!&yX=jMZ<05= zlnU$+u65O4gkiJtlaqyzaKpQ~6%anV@}RACqbP@8v{=U#`fMTeP0)jU5{*2Ai;Z`b0w^$-JPH@=wxei{+cd2XJ*chJ-&>I>P-Xn zc;HoKe#6Q=F#|q})&$9idEH>OS7Y6EA)A`~y}Qao&#}<7TE#{hRoVBMkBnv9)K{#S+!?^8Dj3AsU z*6`mrX4~7XNxNTMIy3EyeeN0cZ-?H47`xg3k#5rbn>yH;Bdx8pT#@#0+usZ#_G1fC zs{je!T)ny`a!ieq(q2;g8{s}y5AdP6s_m1g_l-Ifh=m`H;v>pnCB(b#@rO;4s`G_5M>G3*yYKvQOz5WVx#b^a(vn&yo93i}!Yn zg1=HE??pA5Xpev&FcD}!QKTYP(O?bx8hNDd9v&jJ9ZgF9{uX)}&+hU6o?TO7?yy2wY9JBLF8r1Vr1fup4eiB@ zA`}gKw29lVsP(Fb9kgDdUKEC+B0JGyQ^N`4DVy(489D7C*;34hW%WWYrprC}KnT3x>TUM|rMvQnf?#jyqes#db1cTZfmX;Uk9S1YiAa?W=U)~#zB_iE+k{Y zbOjlg9ZHl}DYlvaGvaHr_K$@yJaKc~)*HgcdG2Or+r2${RkOv?fA37`s|zy?dK&2K zF6QXbPXK>oxf$S2n7x#qG#@pshD!U;tUK(JMw;;wR4g@)?&NeHqbG@q&va(5TIRbu zHL`L2JYdEv3(`nTunztapA?q^X722K9FI&E*2CF_DU=&S4}3GF3a z?P|DG9(E=3v)|%6^T!gNyPLH!Hfnj4|5f?T20&WLkzRWN+uAI2Gk-#4tQNvf{N1#(*_M4@fiz@>5+jafrn6#z|o1ZQq zn3&SWPaLdzEGDUakT6q?<$~x^eeCDM8oU)3!6dCVBz2-tL@`0>@Q$DR7J!$OjFT5} zk}sy*(miL!7>ZiP^*#dn;t^ChuD>;H zV*%@lg!YK$L>ToUi?8_M&br8n3YpdH_Rkpv$Oo3|Z#(sS)R`Xz1U|#O^$zCy_X&J% zmyrSj?rIHpHP!ZVwsv{=o9bz!)ju%PG;NpK)uy+uw9pN~@nz!b+Pp*wN_k6suZ({_ z^~oNwPJ8QOuo%_s)5LGWoB^}SENYR+)`{crjG@wixMcS*H1aSGItpjRRkV)=s>qij z#a}nvxf3d;8@oD9Y_^q_M+9zs0Ea!^_A%r3F(pu2gq~o?n7`!y02Zm?~r%YQE8)s>eVo_W)Y)q zKufaz~OWVF_xJWu|{Cl7z4kn4txpc92%yg8*x2%c#9qRc|>je}E$Q~Ujr9N^E& znt4BR=^(!?X(lY0n#tO4Li%FUZ5~^ZYO2iGU+goRB5xLEdF-fx^Qd;VK4t$zMgygcNduRkt? z6Z5D)mLY#CUmg?A=im!5!q5VY^2JpAr|Mr~2i>a$a^XM3n53 z`oD|tpW2tx_dH221dfU6MbG_DDf;t(%h3Ao0~9fD^M7()Lj@NTCIEm4^9jT>k*~jP F{udYus)zsp literal 0 HcmV?d00001 diff --git a/sandbox/grist/imports/import_xls_test.py b/sandbox/grist/imports/import_xls_test.py index 935c7104..1c7134cb 100644 --- a/sandbox/grist/imports/import_xls_test.py +++ b/sandbox/grist/imports/import_xls_test.py @@ -33,9 +33,10 @@ class TestImportXLS(unittest.TestCase): self.assertEqual(parsed_file[1][0]["table_data"][1], ["a", "b", "c", "d", "e", "f", "g", "h"]) - # 0s and 1s become Numeric, not boolean like in the past - self.assertEqual(parsed_file[1][0]["column_metadata"][2], {"type": "Numeric", "id": "boolean"}) - self.assertEqual(parsed_file[1][0]["table_data"][2], [1, 0, 1, 0, 1, 0, 1, 0]) + # check that column type was correctly set to bool and values are properly parsed + self.assertEqual(parsed_file[1][0]["column_metadata"][2], {"type": "Bool", "id": "boolean"}) + self.assertEqual(parsed_file[1][0]["table_data"][2], + [True, False, True, False, True, False, True, False]) # check that column type was correctly set to text and values are properly parsed self.assertEqual(parsed_file[1][0]["column_metadata"][3], @@ -196,12 +197,29 @@ class TestImportXLS(unittest.TestCase): self.assertEqual(tables, [{ 'table_name': 'Sheet1', 'column_metadata': [ - {'id': u'A', 'type': 'Numeric'}, + {'id': u'A', 'type': 'Bool'}, {'id': u'B', 'type': 'Numeric'}, ], 'table_data': [ + [False, False], [0, 0], - [0, 0], + ], + }]) + + def test_boolean(self): + parsed_file = import_xls.parse_file(*_get_fixture('test_boolean.xlsx')) + tables = parsed_file[1] + self.assertEqual(tables, [{ + 'table_name': 'Sheet1', + 'column_metadata': [ + {'id': u'A', 'type': 'Bool'}, + {'id': u'B', 'type': 'Bool'}, + {'id': u'C', 'type': 'Any'}, + ], + 'table_data': [ + [True, False], + [False, False], + ['true', 'False'], ], }]) diff --git a/sandbox/grist/parse_data.py b/sandbox/grist/parse_data.py index 20ed9ea2..f0168623 100644 --- a/sandbox/grist/parse_data.py +++ b/sandbox/grist/parse_data.py @@ -76,6 +76,20 @@ class NumericConverter(BaseConverter): return ("Numeric", values) +class BooleanConverter(BaseConverter): + """Handles the Grist Bool type""" + + @classmethod + def convert(cls, value): + if value is False or value is True: + return value + raise ValueError() + + @classmethod + def get_grist_column(cls, values): + return ("Bool", values) + + class SimpleDateTimeConverter(BaseConverter): """Handles Date and DateTime values which are already instances of datetime.datetime.""" @@ -123,7 +137,7 @@ class ColumnDetector(object): """ # Converters are listed in the order of preference, which is only used if two converters succeed # on the same exact number of values. Text is always a fallback. - converters = [SimpleDateTimeConverter, NumericConverter] + converters = [SimpleDateTimeConverter, BooleanConverter, NumericConverter] # If this many non-junk values or more can't be converted, fall back to text. _text_threshold = 0.10