From 58434618ce8a72937c64574be6957e08e7a63eeb Mon Sep 17 00:00:00 2001 From: Yashwardhan Khanna <48684054+SHAY2407@users.noreply.github.com> Date: Fri, 12 Aug 2022 20:48:44 +0530 Subject: [PATCH] fix trial #1 --- .../Images/Accuracy.jpg | Bin 0 -> 8763 bytes .../Model/handwriting-recognition-project.ipynb | 1 + 2 files changed, 1 insertion(+) create mode 100644 Handwriting Recognition Project/Images/Accuracy.jpg create mode 100644 Handwriting Recognition Project/Model/handwriting-recognition-project.ipynb diff --git a/Handwriting Recognition Project/Images/Accuracy.jpg b/Handwriting Recognition Project/Images/Accuracy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9e4fb674dd7ff737ece9db4c6968324302dd460e GIT binary patch literal 8763 zcmbWccT^K!7%dulZ%Xe1A|0tJAP7ha2nb3IMd_hMdhY_#6_hGLI#Q*E5{Oa-1f)x- z2}L@g2MNjLckf;IuJ!(U?|pOTuQ@a8+h^w6d(U?We+a7pMm=qyHh_qT2oQ9000;{J zO#lfo@qgncNN)xiB^enhDH$~d1vw=xH7zX-H4P0N{Vhg1dM0`r8b($|rrXRcEG)DP zZ0xMe?6;U%nE!hS5y{Oxq-0cNWK_&_G<3}WpM&rnz(fg^(hh)9@-2z>xP z0Dy?>CfolK{@+1Fe3K(N1tk?V&CP@sMgTDp2?;SN$$wej%nrY~4j^SByLCrKo&2`( zYYG8xX4&WuMU;XX4L?{+CNM&B_C7IG)U0gm9GrKB?}>^@bXWq z+`|+OSS@GdpBX6Ybau2nk0Ss=-Z$MFRyw9&iN*Ayq+?VBOSV-7C;jq41wQ&LS&_FJ z6BQ(NmB<63e-B6D1%Cp>-d8J(q?*oe%D=gwGVzulU>HT1kGO=UW8q>XRQ>; zhIJAEAOhf68qawz*R^qMK+`Mx#6V+a6?n9%IDJjajMIenQ= z0mApg_SalUMl!ucbJq^jmhVo5y?Bkv=Mu(j)^*(NuCIhcS*PKK4L~_1$!`Xavz=#? zo(&(yMzMf08p5RzwFJNr0bpQ@s2_@`{%5#+r4{r8+kz$lm|Xc7YEEmZS9&AFk4(l+ z2!I|j?g#;zwACJPw3AB&+Zl3pa_&h z+vvKg`Nu*pVM(~x;v{6h@pGXhE{?S{Q>SafB7&*u=q@y0AgIJ_vL*RMR8;jt*~sf# zgVAv^L*9=h3kCx)_n#~bJd0!ud(F^G01yWy^j)wX+x#fmK3VC4KP7*!#o_Y>O7w8R zHPhH!OvcwuJOBa|=;u=Y_S87b`5}OQvl^__lGas@+);Ac=q{;P(ryARA@v&jEca6Xflc%1tLCZ7qfu8QUQWnpcodPNC7lV7Q zkw+`@O#`+vnEKxIs8-JAJnm(lBgI_Se0L`o@B`%jvyiZCrr`@osUZ@{PG=eFeOUlc z&w-}{`DNx3=xCDwSP$Vi5f_y#AOL70bI%l5PP*%_GG#js#%&~Q!xrl;)+7H~5Zcf}Nk6IOlXTO6tq6Gn0F~}n3|CuI z8?bTjG54|9;!I72)`H9FH2xYwsNJA<-peKYY0PIeZPsvj#jA0J_hF_S+x5j+A4w;d zVuMSuCJXwL3Xv?X5H!bO%e!?r>>DiA^FsUE0H%3TZb`f3Zwi2;onN%{bNxsyVijRf zj6c)>>CNrbUq0hhCR8uW;XdR9%>ubf%yn>xx9br|`@x;_yss9+Q=!423H% zhg3rvZT)OO)b{7JY&4`V{WeYzBbDN>pG+6nha9NMd&!n(8yz(J8v+zMp-V9t_m=vJ z_HE#k`oodxGrWJ`8yT8S003btqjQJ*K*haIyA#Bc!`xPV!QbmIDxi`uL2NFXt4nY{ z5YN)c*2?+mZ-N33{hY}#tLQnXY4fYLNsXC~35PC!M#)6jy{}<Q+6uRSYT?q`=7L+xX`Raq}Vp zwhi$;ry+Ccs(0O2ZijiJj3|0h)#8y1e=Kd)BWpn8J{PwQGYXs7Gc2M=2N(+A9;?BS zwvTB}j#s-o8aE{Wg5YNIGoO+F`XT6&^ z+MVN|`+Jo`$^wml#?rEptgU6PVwKO^HkyFhk~GbAIZ zL_1TfWA?ULgQxJ+*p=45#iAI#ySdIQ!U1&yI&_)GpI&*#n)!{Ij9X24`qL>L&|J|} zW`%}UTe9kwr$&iw?dUr3$51}xpBqO|dH7!y&gWaiAWzgYO>7+c?7Wm(Q|~UtA?XY+ z#*CV1KHjl=Pon#Wf+EstH=*xq7~j<+EU2#|AhsfTVN^{I);KAtHX%_A-Rh;)z0W>K zEnoeeR-+CeTxHz!*{l^DvP=LJG!p>vd`RP+O9XlLPRJQcuf?_+(^PL>iaA~c(~Mk< zrYo3%#UD7d#!_6w=%!xDw{sHhXY~mto6Eh1 z6C16aDFR--zb;IK5?lq}O=U3fh>PmoH}MGo+TBpJz#7|K|E|4o3R8bNlM!GVTQ?lm z1T5DNLZVGmRO)^hGiQ#-@BRyVpODr7|~|vb2(Yl^SCn){L_SmGs{zgS;9=P_h0J{a_E9t?X3BoQ{nWSb;%O zy3h64Cj1M(vuN7@|D|t}x1c|G__~z;v{U~O98oTtxBhYN=}K*8WW`z0=@PT~W3)6@ z4*^qAzK*?4zfM2T#xLL}P9i4OgI!$aTf;hWqwOmLi)(o?WARDiy$CQJKeogU8(9q6 zHd7Uc=bfWhvF0FCCDZGUr>sD)T$l$FhoDH0=(|AUjn9WqmulqeLQ`3e3*+$Y`fKA+ z7kJhwzsHjGek<(PRX4oDiB&|}RDANu?0O4gMMaD`h&`|R1d&q~9M9^m#;{)KHrO#^ z%?7{cN%!s>X&|(w{Z{CMA+NDsPH^11Es&;{R_t;#yjA~-B-87%~*dOj#*BI4vPNT#d8sN;4y(i6ePM~zp|YmK1A zTtqv}sHlata=OG?a?Y8A20M5w{5gIlDkc&AoO0<&-gBpomz(Rw*ZVaqi}?gVRF}ac z5H@h?D6*UY5Zo5~>)9)*p~ zJNQ94uc2NoX~u;0MdiHm?V)>nMUo#fuB1(?5{t?t5Y=8@%=!M-lzcq z@La!^eC*%AL-=^p_Fr>&wo%J65F#+T;mUvYiz1tIRgNf1_c32G+`whEo_bf6f7&vu zSFb7WeD%vnuy;|w_k%ZdJ}V3d5rS?6!0TBR{d)->%_CbXYhNtq$Gr9)(EY3pau!vl zXNiiD$r9qrrqv0A>|ET?(!bbwyZUW+ylX%@M*K$zeHWx^9-IxlTid&YoCkhg8mT}& zyMpPHMU%AXDvohM|Z|ZWHIG`a- zLX!4r^)#idL&m!ms8{m(+o1bv?XkrIGiY7} z+XhZ0PU?7@0HDA#paZNT-=5Y=_gS@ey1Q?ZqdZgeV#nkxp%U2ZRXft% zkgU`yw-X)*gJ`LP(_K2nopUr-fJsk48f5|e3&p6gqNo^q%y%xlFcyJ=P}LQlSC4dF zD$qo>ET|W4XIs^bLk16Vofhk_*?yJ$;+N9p7%DqJ5&$zdvKWg^_f)>o*>zxjm@6zt-mnM1%Q27V-bxF= zXZ*w~OJiKmZ1gtI1I>j#FB%Byzc(5V+<2q-Av)>bTt9-!Rh4ZLr|Dgz<6X8h$_cKm z(7L+^+gtOgh2O6pl4BwJmZWQH|At8LJ+}5r-4c5bRcb3d<+tx;=~b?~Ukt}|fBi%p zw3a!RK4hM-;n4JJ9=_&v@NuI!BT~&yrh02`1W&YO%Y^Yvh>@R{@Y_BEy%YNQ3uHcX za3-qBNKTq9`xEu1WP`%>(Ziufv+$6$JIY)+-RA3h^{eydkm|LqKLaY%;++_=v+$%X=f)DCK%}Jv)qCT`Q_R#~zbSbhhhl3C@ zY=uqJq~EpP)_9n;h4tcgV8Coh^b;T}%%tzzV%V2&#|1F4ERN>}@*9nK7y$jyohWH|H+)TUp388!$}dYLF-_CrYOmOT4&3XUCMohlzv9CbEZdtFEl zpZe92>3X8i5;wAdT&4>9K-SlzCPe|hUwPMQrCra9mg+BFH7sNb$AD5}nRm_ZNDitt zY9Vu;Y6j(Bg^K2ot9#38U^EtVJ)2}vmKG_DdRAqg9T9vOHR3YA^^J~|!%*DeN&76e zX*D4h=ZWSxtF`Jm*5A!{-8-0_Gwp1NIj5W-*77AasWGQi86uxIjiy&$MVHM+&;{vO zPx#SEg)0T!`7OW*Hdo|~8!g!Q_?py7up?e8S5u(!72hVnYfQ|DHOvBY5#ui&{@}T% z$>EDEBH#1JY+7A2!f66}i9-4ojxyEvUzL5QHH{jZO6%qzoe)Z57Z-WOqX!LFU*~hs zFUwk~KYJ6{0SS$uUnxV71)Zz^f!&-yGi2ndJ0pTO!azZ)#xYJB!nM7W^$S z2siE}2GJXQ`cp;yt3ttSrK3KLYpruz-u`dmiGFU~Ux)GRT4ed3Nn$OJ!+bFcu$UM9 za}oT>2AHERPQUED6j|go^zZ0#%8Sy)5gL4_gM~7{EtFv2>yDB5^3rL4a~T&dm218nV!girbuUxaIVw|Y{KmmPgx<(b2arhy&<`_-RU&B3;E4o8zC z1CgvMY$aIdU6V{LM9Z|4lyKXGRBf#^%Pl$D_tvA-r6VV|IN4tPh+b!OhkVD&^NL^_ z&{w)DMwQl9e~?yjn6V;M`{|l}|E5`+eqxE{n~a=4#O=l#7(=j4Yl#;DFt>vJwzc9w zfm+#HDc=eyZ?D3B9Iv)r!scCBw=~<>pBp@4)sN32cNg}CY?hnCf*q?XU(FL$Pv2N% zs%lrF~P%xvm1d(z3w1uQ5m5YMWBn_PDJN-3<1? z-K$#Ca{=0$9wS9K9FyH!ZAe5sOWxcgP5T{#M22NwO7E;y9&DI^mJe_K(nkx@le?LM z+y{`qth#VkpUP{w`JAr;l53X1zC-l;J&$;2Es$EYKvN%$EPI)@4Hb1vY_ICQ1T7m4 z9$>`;O7T2<_TT6_Td@GMijVc6?1-3RpG2cFg&2)v6j;UXD7gnu0gst_QD~fX4uZ7i z8%XQGr|-qnu8rm?2RXVvfBytb9Ag@_VJLJ&(LYWd1TmPXFP|=*em(nQbjsoPUd45gq6g}4Svzr=oa^(s+K4E!N^~6LLpfc;Sce0kMiI(m)uGYQmxE7`CG5K3VmbwM=BB4j)n64YT3GjX{2N6HVeM=zdu_jkUbVCc7bQ z4|ff?93p>m(8D^PrI@mv_mjXk7-A3VxN3kU2uV)%(rEtVJFrIQYoxvIiDo688InqYrlO^(^VVg#d#LZ7BnlS^i@s!p zZ~RYZiV4?>fV}YM@_F9w+AQNi$={V2k`>13cR6blkS%-+i(SjP4+;drq2?(Aqb(NX z!!qozbRO;6(D^P49G%Ca9{cuos8)xc+vxPMNrTHZ*MAZKuAc;DWe-g6yh$>&h#pXy z?gU5d9(ETh?{BK6|C_2`^-~xPnnTUT9uX_wsaaUe-E#zcO3Pnu?|JRdXS)XnvL0l7 zaeDZQot&G|y{yhOrU6>z6J7^VA^sHS^RnqXb@Vxa+C%AtM%H zoBa&4zZdYrYQ80-v{CZ(8&@Lt*BjID)Llf1Q>OjFt*!?@q*F4nSAEd?nAVcW(p{^< z1fe)YtDkuAs|G~8-F@;@q2T)yL$njB^NuzXg*l4r29Ip34{>VZ^&@`Ti8s_;4ZCc< z4&3p_liXO-^mtop0^qGt&%dxd)!5t|P;c-ff3QV!N@G(lb4FhbJJSL?^9m*I9%d!R z|B=5krS!oWP(YCYKyuDj2Ub}RGTlir9`Tn*=VYbsXO6q8&i`J#YRPDnG-IokX6 z-4B8DuKCb^Z$|F`7rZ9o-kzLLe>wtpA%sq^l0PARLUtp&4lkN&en(JZ<$6_x415hI zonnqfouSJVh_;NeNz23&M_5ZREu->57+kj|Y>?w}LB?%`SmkzIP{4|2=cKce-vc%_ z<=4|*#aZT`8`@5sfTS5UoeLqlzv@5Vf5`H4hgTF^h+0YFrNQ*!`7zn=)LNbM(C4$` zy@6&v$ZbryQ{kwm#&d=ojP{qJ5q)TqdorfJ&nSr#mV72n{dl=5udbF|-21C*15&F< zY4Mqq?eAvs^Gb(=Jroao2JDn9rKwcBlBIYE$zJ`0H@6u$`Z zfT2d8bo|+wePJ;+@%cWGv55cQ>fBunejIID&{dv@UWlY!1fRcG=-lmaI{VSyO-)_G zr4uDV+e`Zg`ka!Vm`i-?)TD9daUqWHB(m}Ofl}td%^|5iy<5g28l6m_hw` zRd}IGUOG)bHCe9B6UnQR4?hPU)oT@39qg}!;#F>J=%o4Lv#wpc3LGCE_?aOgf@cvN z$)XaB1~s@S7NThkRA)u9C58EcpJtz!a~b7-C%qUGe~Zn;u$jP*Hel~?lJ3m2g?})v zhb5ZmE~8$ih0U=>Q|-p2Gr zeL8PhI0@jn%nWjRwXhi66uuOD2bTV(_@ha3hYjKwbW8xOKKN}YTCJu)07QfG4_gs& z=?}0rn0h>u9ksia+B?tFC1Yv0R;9Z>yOEM)<8oIYS#0GsI%4ls*;1S8yFy>k$SMJl zenv-w=8N-t6s9OI&1`E>IauY!#>VFsU1o8y&`iZTOZK7RLH=<(Ux#N7B(nHjomt*i z?eP==@WC$7y}06PWThIid5DMLc?Xkmu2?%X1Bd$?TdeQXsr|Eur{_({eZR5Y1b|jI zOrkH&wYIjv%RzfVpmgkc!|P^Z;MpiI@?ad;07Jm>6WvLqY3P$-rr={TqS8MPZ3geT zuDY=JB)MxrcK^-&ojuoZ+gC+K+69i3)s53(QNDG^FjuVly#)DvHPaaqHZ$QarvxT_MwMcV<@eO2aq815;MFeo zekQDoK{ICm(@ehA)`dBn(VNMB&cs-_3@L}^Rc$S-K4*ytg`md)Vc7X9Z1s!ZVd9YD z9joy2Z2smWmS1y>gQPH7Dh-7g@uu)EbotYC6`POh?ThOU^Yj1B{lhoQ0kx%QTp&HO zxa{5O<3c*D`5+$y&a_UwWP6!=7AI4)%TNsr4E{Rq5bo+MIA$d3Sq?m36I*t)Bo!s` zI`JK}WjXRtV`{cz57L{`nkk1(N%>XRDqBqpe=}*+r>zz<;J$rK`;4SN}{| z#Fvcq_qMtuM2K>B!kRj#22=eZ z^x;rlD~-BFi;~2dw>2_CZsuO<%I`SMZnmSsv{pk8$#x>BC*|1@jecyuz2tdxEIN)Q zOI(tkV3u-(GHg;gtW{hcX{j8=WA~X~*dDCR92Bl159ebqJ{4x8oDo8A)JSkt)wqWk z>Mi8{MpjW3kG^qt-e05CwP^fQsKR%#H2r(G(myu6TKDqsdVI!gs%L1>dcQbH?1@Nm z5rWSn597=QwT9dssrMCfZCh@7HaNaz16KgNXyQ$Z? zn+6rxvPVs}r>foD_B=PDw-9TfhGI#;-#*;q!ve$RRaUS%Du)f1q2t-GvC9jOg-#Wh zhv-K9?I11_OhGH6Dkxqbh%-h@8XD+74`p+SS0J*7#V9(NIi*petv_*i?nifY4vTcP z?n6d!@0Stb1VBA}70$NmX*h^xNKvt?fBZQwzVi3>Kfi#qu~wu9SBmbhVFWz*Zk`U{ zm6z6t$Ix+Pl3G(>qN)(c)~(}X1mB#B&R2b3=B$u{>5g*esPFveJG3Hh^2)5fkc)M!-03 256:\n img = img[:, :256]\n \n if h > 64:\n img = img[:64, :]\n \n # rotating the image clockwise to bring the image shape to (x,y)\n final_img[:h, :w] = img\n return cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T13:34:01.172690Z","iopub.execute_input":"2022-08-12T13:34:01.173126Z","iopub.status.idle":"2022-08-12T13:34:01.183181Z","shell.execute_reply.started":"2022-08-12T13:34:01.173078Z","shell.execute_reply":"2022-08-12T13:34:01.181253Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"# training the model 30000 images and the validating it on 3000 images\ntrain_size = 30000\nvalid_size= 3000","metadata":{"execution":{"iopub.status.busy":"2022-08-12T13:34:07.830556Z","iopub.execute_input":"2022-08-12T13:34:07.830982Z","iopub.status.idle":"2022-08-12T13:34:07.836832Z","shell.execute_reply.started":"2022-08-12T13:34:07.830952Z","shell.execute_reply":"2022-08-12T13:34:07.835488Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"# training\ntrain_x = []\n\nfor i in range(train_size):\n img_dir = '/kaggle/input/handwriting-recognition/train_v2/train/'+train.loc[i, 'FILENAME']\n image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)\n image = preprocess(image)\n image = image/255.\n train_x.append(image)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T13:41:51.845380Z","iopub.execute_input":"2022-08-12T13:41:51.846099Z","iopub.status.idle":"2022-08-12T13:42:38.716267Z","shell.execute_reply.started":"2022-08-12T13:41:51.846060Z","shell.execute_reply":"2022-08-12T13:42:38.714703Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"# validating\nvalid_x = []\n\nfor i in range(valid_size):\n img_dir = '/kaggle/input/handwriting-recognition/validation_v2/validation/'+valid.loc[i, 'FILENAME']\n image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)\n image = preprocess(image)\n image = image/255.\n valid_x.append(image)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:12:01.127329Z","iopub.execute_input":"2022-08-12T14:12:01.127797Z","iopub.status.idle":"2022-08-12T14:12:05.442923Z","shell.execute_reply.started":"2022-08-12T14:12:01.127745Z","shell.execute_reply":"2022-08-12T14:12:05.441529Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"# .reshape(a: array_like = -1(we want numpy to figure out as the dimensions are unknown))\ntrain_x = np.array(train_x).reshape(-1, 256, 64, 1)\nvalid_x = np.array(valid_x).reshape(-1, 256, 64, 1)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:15:04.071714Z","iopub.execute_input":"2022-08-12T14:15:04.072120Z","iopub.status.idle":"2022-08-12T14:15:05.639892Z","shell.execute_reply.started":"2022-08-12T14:15:04.072088Z","shell.execute_reply":"2022-08-12T14:15:05.638520Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"code","source":"# labels are converted to numbers representing each character\n# labels are then prepared for Connectionist Temporal Classification Loss (CTC Loss)\nalphabets = u\"ABCDEFGHIJKLMNOPQRSTUVWXYZ-' \"\n# max length of input labels\nmax_str_len = 24 \n# +1 for ctc pseudo blank\nnum_of_characters = len(alphabets) + 1 \n# max length of predicted labels\nnum_of_timestamps = 64 \n\n\ndef label_to_num(label):\n label_num = []\n for ch in label:\n label_num.append(alphabets.find(ch))\n \n return np.array(label_num)\n\ndef num_to_label(num):\n ret = \"\"\n for ch in num:\n if ch == -1: # CTC Blank\n break\n else:\n ret+=alphabets[ch]\n return ret","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:15:31.449851Z","iopub.execute_input":"2022-08-12T14:15:31.451145Z","iopub.status.idle":"2022-08-12T14:15:31.460936Z","shell.execute_reply.started":"2022-08-12T14:15:31.451113Z","shell.execute_reply":"2022-08-12T14:15:31.459323Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"name = 'YASH'\nprint(name, '\\n',label_to_num(name))","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:15:52.049364Z","iopub.execute_input":"2022-08-12T14:15:52.050183Z","iopub.status.idle":"2022-08-12T14:15:52.058398Z","shell.execute_reply.started":"2022-08-12T14:15:52.050152Z","shell.execute_reply":"2022-08-12T14:15:52.056747Z"},"trusted":true},"execution_count":23,"outputs":[]},{"cell_type":"code","source":"# train_y contains the true labels converted to numbers and padded with -1. \n# The length of each label is equal to max_str_len.\ntrain_y = np.ones([train_size, max_str_len]) * -1\n# train_label_len contains the length of each true label (without padding)\ntrain_label_len = np.zeros([train_size, 1])\n# train_input_len contains the length of each predicted label. \n# The length of all the predicted labels is constant i.e number of timestamps - 2.\ntrain_input_len = np.ones([train_size, 1]) * (num_of_timestamps-2)\n# train_output is a dummy output for ctc loss.\ntrain_output = np.zeros([train_size])\n\nfor i in range(train_size):\n train_label_len[i] = len(train.loc[i, 'IDENTITY'])\n train_y[i, 0:len(train.loc[i, 'IDENTITY'])]= label_to_num(train.loc[i, 'IDENTITY']) \n","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:16:06.528136Z","iopub.execute_input":"2022-08-12T14:16:06.528554Z","iopub.status.idle":"2022-08-12T14:16:08.159873Z","shell.execute_reply.started":"2022-08-12T14:16:06.528524Z","shell.execute_reply":"2022-08-12T14:16:08.158600Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"valid_y = np.ones([valid_size, max_str_len]) * -1\nvalid_label_len = np.zeros([valid_size, 1])\nvalid_input_len = np.ones([valid_size, 1]) * (num_of_timestamps-2)\nvalid_output = np.zeros([valid_size])\n\nfor i in range(valid_size):\n valid_label_len[i] = len(valid.loc[i, 'IDENTITY'])\n valid_y[i, 0:len(valid.loc[i, 'IDENTITY'])]= label_to_num(valid.loc[i, 'IDENTITY']) ","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:16:25.240659Z","iopub.execute_input":"2022-08-12T14:16:25.241050Z","iopub.status.idle":"2022-08-12T14:16:25.543503Z","shell.execute_reply.started":"2022-08-12T14:16:25.241021Z","shell.execute_reply":"2022-08-12T14:16:25.541621Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"code","source":"print('True label : ',train.loc[100, 'IDENTITY'] , '\\ntrain_y : ',train_y[100],'\\ntrain_label_len : ',train_label_len[100], \n '\\ntrain_input_len : ', train_input_len[100])","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:16:35.032027Z","iopub.execute_input":"2022-08-12T14:16:35.033341Z","iopub.status.idle":"2022-08-12T14:16:35.042920Z","shell.execute_reply.started":"2022-08-12T14:16:35.033308Z","shell.execute_reply":"2022-08-12T14:16:35.041419Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"code","source":"input_data = Input(shape=(256, 64, 1), name='input')\n\ninner = Conv2D(32, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(input_data) \ninner = BatchNormalization()(inner)\ninner = Activation('relu')(inner)\ninner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)\n\ninner = Conv2D(64, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)\ninner = BatchNormalization()(inner)\ninner = Activation('relu')(inner)\ninner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)\ninner = Dropout(0.3)(inner)\n\ninner = Conv2D(128, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)\ninner = BatchNormalization()(inner)\ninner = Activation('relu')(inner)\ninner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)\ninner = Dropout(0.3)(inner)\n\n# CNN to RNN\ninner = Reshape(target_shape=((64, 1024)), name='reshape')(inner)\ninner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)\n\n## RNN\ninner = Bidirectional(LSTM(256, return_sequences=True), name = 'lstm1')(inner)\ninner = Bidirectional(LSTM(256, return_sequences=True), name = 'lstm2')(inner)\n\n## OUTPUT\ninner = Dense(num_of_characters, kernel_initializer='he_normal',name='dense2')(inner)\ny_pred = Activation('softmax', name='softmax')(inner)\n\nmodel = Model(inputs=input_data, outputs=y_pred)\nmodel.summary()","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:16:52.431956Z","iopub.execute_input":"2022-08-12T14:16:52.432441Z","iopub.status.idle":"2022-08-12T14:16:57.125958Z","shell.execute_reply.started":"2022-08-12T14:16:52.432380Z","shell.execute_reply":"2022-08-12T14:16:57.124456Z"},"trusted":true},"execution_count":27,"outputs":[]},{"cell_type":"code","source":"# the ctc loss function\ndef ctc_lambda_func(args):\n y_pred, labels, input_length, label_length = args\n # the 2 is critical here since the first couple outputs of the RNN\n # tend to be garbage\n y_pred = y_pred[:, 2:, :]\n return K.ctc_batch_cost(labels, y_pred, input_length, label_length)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:17:11.996334Z","iopub.execute_input":"2022-08-12T14:17:11.996806Z","iopub.status.idle":"2022-08-12T14:17:12.004659Z","shell.execute_reply.started":"2022-08-12T14:17:11.996762Z","shell.execute_reply":"2022-08-12T14:17:12.002945Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')\ninput_length = Input(name='input_length', shape=[1], dtype='int64')\nlabel_length = Input(name='label_length', shape=[1], dtype='int64')\n\nctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])\nmodel_final = Model(inputs=[input_data, labels, input_length, label_length], outputs=ctc_loss)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:17:21.872630Z","iopub.execute_input":"2022-08-12T14:17:21.873066Z","iopub.status.idle":"2022-08-12T14:17:22.002669Z","shell.execute_reply.started":"2022-08-12T14:17:21.873037Z","shell.execute_reply":"2022-08-12T14:17:22.001344Z"},"trusted":true},"execution_count":29,"outputs":[]},{"cell_type":"code","source":"# the loss calculation occurs elsewhere, so we use a dummy lambda function for the loss\nmodel_final.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(learning_rate = 0.0001))\n\nmodel_final.fit(x=[train_x, train_y, train_input_len, train_label_len], y=train_output, \n validation_data=([valid_x, valid_y, valid_input_len, valid_label_len], valid_output),\n epochs=60, batch_size=128)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T14:17:37.141027Z","iopub.execute_input":"2022-08-12T14:17:37.142315Z","iopub.status.idle":"2022-08-12T15:03:19.269779Z","shell.execute_reply.started":"2022-08-12T14:17:37.142281Z","shell.execute_reply":"2022-08-12T15:03:19.268395Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"preds = model.predict(valid_x)\ndecoded = K.get_value(K.ctc_decode(preds, input_length=np.ones(preds.shape[0])*preds.shape[1], \n greedy=True)[0][0])\n\nprediction = []\nfor i in range(valid_size):\n prediction.append(num_to_label(decoded[i]))","metadata":{"execution":{"iopub.status.busy":"2022-08-12T15:03:39.728736Z","iopub.execute_input":"2022-08-12T15:03:39.729189Z","iopub.status.idle":"2022-08-12T15:03:42.927008Z","shell.execute_reply.started":"2022-08-12T15:03:39.729157Z","shell.execute_reply":"2022-08-12T15:03:42.925513Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"code","source":"y_true = valid.loc[0:valid_size, 'IDENTITY']\ncorrect_char = 0\ntotal_char = 0\ncorrect = 0\n\nfor i in range(valid_size):\n pr = prediction[i]\n tr = y_true[i]\n total_char += len(tr)\n \n for j in range(min(len(tr), len(pr))):\n if tr[j] == pr[j]:\n correct_char += 1\n \n if pr == tr :\n correct += 1 \n \nprint('Correct characters predicted : %.2f%%' %(correct_char*100/total_char))\nprint('Correct words predicted : %.2f%%' %(correct*100/valid_size))","metadata":{"execution":{"iopub.status.busy":"2022-08-12T15:03:57.275214Z","iopub.execute_input":"2022-08-12T15:03:57.276553Z","iopub.status.idle":"2022-08-12T15:03:57.309933Z","shell.execute_reply.started":"2022-08-12T15:03:57.276494Z","shell.execute_reply":"2022-08-12T15:03:57.308509Z"},"trusted":true},"execution_count":32,"outputs":[]},{"cell_type":"code","source":"test = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_test_v2.csv')\n\nplt.figure(figsize=(15, 10))\nfor i in range(6):\n ax = plt.subplot(2, 3, i+1)\n img_dir = '/kaggle/input/handwriting-recognition/test_v2/test/'+test.loc[i, 'FILENAME']\n image = cv2.imread(img_dir, cv2.IMREAD_GRAYSCALE)\n plt.imshow(image, cmap='gray')\n \n image = preprocess(image)\n image = image/255.\n pred = model.predict(image.reshape(1, 256, 64, 1))\n decoded = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1], \n greedy=True)[0][0])\n plt.title(num_to_label(decoded[0]), fontsize=12)\n plt.axis('off')\n \nplt.subplots_adjust(wspace=0.2, hspace=-0.8)","metadata":{"execution":{"iopub.status.busy":"2022-08-12T15:04:53.781368Z","iopub.execute_input":"2022-08-12T15:04:53.781926Z","iopub.status.idle":"2022-08-12T15:04:55.516671Z","shell.execute_reply.started":"2022-08-12T15:04:53.781841Z","shell.execute_reply":"2022-08-12T15:04:55.515279Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]} \ No newline at end of file