From 04b9090b25b3e7749e36358bb75d53d7c4192448 Mon Sep 17 00:00:00 2001 From: vivekshingate Date: Sat, 8 Dec 2018 14:48:35 +0000 Subject: [PATCH 1/4] Done --- __pycache__/__init__.cpython-36.pyc | Bin 159 -> 157 bytes .../__pycache__/__init__.cpython-36.pyc | Bin 170 -> 177 bytes .../__pycache__/build.cpython-36.pyc | Bin 655 -> 700 bytes q01_outlier_removal/build.py | 31 ++++++++++++++++++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 185 -> 183 bytes .../test_q01_outlier_removal.cpython-36.pyc | Bin 1835 -> 1837 bytes 6 files changed, 31 insertions(+) diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index 2e5da7da778c48a9c58d7d055bbdb642fcb66418..25d864e8e4d25f35234ccac694138a22f9b59b54 100644 GIT binary patch delta 55 zcmbQwIG2&bn3tEU{qX#v?BasNTjn3tDpxuj9lL=JORAN|nc)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)zMt0Qi&>ga7~l diff --git a/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/__pycache__/__init__.cpython-36.pyc index 2f9a42a105b2b26ec10c60ac4d11fa03f7624d22..d6ce57b785fdfdbd836783446490467ab3511e5e 100644 GIT binary patch delta 55 zcmZ3*xRH^=n3tEU{qX#v?BasNTpM+bgH2eT6#zz#wA#C~Eo%!bF89x}0M_;~ezP^(q^c`JU6#n<&g!=#j39M0# zC7u#Nmbj*pPBTG8wj?zbxfpyxbuM#Bml%WXb5st`!4Dn3zb#nHDs7o*oR>oJR$0?@ zhHJ@;6uUeu;_z9bkDi`maN!Bk$}Y|!e8We89wOgFQp4d~a;SJep=SS{2-&ccZ@6V}S=rS-GmpoofFAz?BT6& oOf)cgUjXR`36j~W>MM{!#^TZ3icZVN=|zU*P>>P6mK8g{0F0%&hX4Qo literal 655 zcmZ`%F>ll`6t?3ecRlsEGM>c5b_@-O6#-SdR6-RJOj#l~&m2yDwre|8i^6pL1ZH;r z50h7>F8u*aJcofR5_aCFJb&+dFYh_e=kwVQ`K`Xo2>D5N7Q^?~X#NQT5TJ_GRM0Mi zY)fkJXNo@C@iX-E9iIg{&>W9aM-{;~i z^?I~1y$&vQy2|79vPZ2{A{N$39byj7*jPvn;u`tAae8Tvk>Yb6Y5SeHIf{S1uRdbI ziYswlod|ysjIG`}(}^~C9Qs=NAlt@?wG-Z_!P`G%b@}vJWBQ<^a7b^=6<5`HFBKfx zo4ELg*-JNYfI#TMxcBL!ak~Yct;$$hu7UfQS%_07I5c>zF3H>^K?)l`cn2QfS)9Ju T80eLFiN783CBihD(f#~4xYMeW diff --git a/q01_outlier_removal/build.py b/q01_outlier_removal/build.py index ec278ba..1fb63c9 100644 --- a/q01_outlier_removal/build.py +++ b/q01_outlier_removal/build.py @@ -1,3 +1,4 @@ +# %load q01_outlier_removal/build.py # Default imports import pandas as pd @@ -6,3 +7,33 @@ # Write your Solution here: +def outlier_removal(loan_data): + + ApplicantIncome_95 = loan_data['ApplicantIncome'].quantile(0.95) + CoapplicantIncome_95 = loan_data['CoapplicantIncome'].quantile(0.95) + LoanAmount_95 = loan_data['LoanAmount'].quantile(0.95) + + loan_data.drop(loan_data[loan_data['ApplicantIncome']>ApplicantIncome_95].index,inplace=True) + loan_data.drop(loan_data[loan_data['CoapplicantIncome']>CoapplicantIncome_95].index,inplace=True) + loan_data.drop(loan_data[loan_data['LoanAmount']>LoanAmount_95].index,inplace=True) + + return loan_data + + +# loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') +# loan_data = loan_data.drop('Loan_ID', 1) + +#Call tothe function - +outlier_removal(loan_data) +# def outlier_removal(data): +# q1=loan_data['ApplicantIncome'].quantile(0.95) +# q2=loan_data['CoapplicantIncome'].quantile(0.95) +# q3=loan_data['LoanAmount'].quantile(0.95) + +# print(q1,q2,q3) +# df =loan_data.drop(loan_data[(loan_data['ApplicantIncome']>q1)].index) +# df1=df.drop(df[(df['CoapplicantIncome']>q2)].index) +# df2=df1.drop(df1[(df1['LoanAmount']>q3)].index) +# return df2 + + diff --git a/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/__init__.cpython-36.pyc index 5a057ffb73694628cef3ed87e03ee3a17f7410bc..97f8adc24d2f9531ffe4ce5d3272ade521bcee59 100644 GIT binary patch delta 55 zcmdnVxSf&1n3tDp!4>Y9i5%w2Df$`txvBbPnPsWj#Tl7->4_z&`r-LS*~JBk$*KCu K`6;OrQ$hjxUlVlz delta 57 zcmdnaxRa5?n3tDpxuj9lL=JP+H2u)x)S_bj#H5VO;*8Yn;?ks|#N5QZ%)~tXywvje Mw9K5;;)zM200=D;>;M1& diff --git a/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc b/q01_outlier_removal/tests/__pycache__/test_q01_outlier_removal.cpython-36.pyc index 4c0b6c7431c6a14108ba9d55dd44059612f144a5..a5a7e3fb07d998730c7c26fcd0d88e6a4edfc2aa 100644 GIT binary patch delta 71 zcmZ3@x0a97n3tDp!4>Y9lM^{taqy+5mZX*Dx0;XBn3tE!Sjs4B??ld3EL`cSB^!6UFshd6hZd(673(J^Wn>m-q-GbFCKV;- ZCgx=(=IQ69mdB@M=A;&HR$}621pp0E7h3=T From bb6ae95e38d05c0af6388fea54d107960b5a861b Mon Sep 17 00:00:00 2001 From: vivekshingate Date: Sat, 8 Dec 2018 16:53:54 +0000 Subject: [PATCH 2/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 179 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1156 bytes q02_data_cleaning_all/build.py | 30 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 185 bytes .../test_q02_data_cleaning.cpython-36.pyc | Bin 0 -> 3407 bytes 5 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc diff --git a/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..914b183340a747225dae19f1d3713d443abbc5d8 GIT binary patch literal 179 zcmYL?F$w}P6hyONA<7=a#xL56cmf*>8{r$0&u-jo{+gd%@JQar%GO(0nJvVDc?<)? zY|3)hKj&wa^&at4k;6V)CK*;8)uK2&R2=I+J{PFDB^ca5jnpnwIuV>WHqvQBVAv9P zuAC%iFj5ss#Bsnb@|8@2+iHzgCymCF4tbB-dq8x-CB$WSe@(Y7+uqY|e3oqW1-;xc A*8l(j literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3eb54d2011317a7aad1d959e54491aa852ac2e58 GIT binary patch literal 1156 zcmaJ>OOG5i5VpH}`n9v0NrYe{!3rUSw3nFx7Y>Mr$OQ@I0@6mUM8;`*XS>(^SlhG8 zMBS$$$M8S+H;ALJoc0$Wae>c0AzC4XTUD;ED%G4d8Z+2m zLCnukf(WWev7c!{C8>itloUF;twpe-`-kpxXGJ`%O^OtAsIn$6@+lUngJxdLDUoq~ zC}ula9Lb~lj)+CF3t-RXk@tyIWIvH&As7A~E9v*-G`~Sjv?3B8N)x2t% zmBupDtIEQ6r`uRnNzJrux@)dpd?2{xE7kFq^;(Lmv{l!#OMxpv8SKK$*& z8;UbZulzWG#E@?w<~OK;)MQ6$I#O743~Vp}JTwS6CSz#BIs#aI(LjdiKB;G~=XeKLB6t;0a`oAn&kO%EFS(WXztW=+-5V+0+a! zhJk)?$@U)y3^;xv`d#e%=hM|i*T~g%buBN=Mb)kWWOdf*OVfjZRf#gtEbGcxz|eB7 zr7^x?dfnBsw5zKJpG@f3)IV6yxKgWUn@WjgzjcQm;G5V2Cz@j@LUS6^1-%#K^kkB^ zVIK$j1hLb*U}$Sx&>0u?++Mht_q}X|OGKqx-bfes;GH9-7j6bla8l6r#XIH_U98)s zXHPLB6-q?46NT<;6u;%RYgF}Y`9GMJR}VgB|N3l}6C#a21aYaBTrhkm`pCaVLU;XC l_{8Wf_1={IIf~gR`Gvnm^A^;Cen?Y1b28{rd@?{3^|vgZF?@JQar%GO(0nJvVDc?<)? zY|3)hKj&wa^&at4k;6WlCmB{9)uK2&R2u6)J{PECOE9>B8m?Wav=E$PYPi#ez(h;n zW93+!!9-OeW)}l?DPGA4+*WI}S~MC@I^;cS??H%#0nr5~h|BK&nr_=@dr!OZpR?5$ D2%I!O literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc b/q02_data_cleaning_all/tests/__pycache__/test_q02_data_cleaning.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7e6580a7de3db9d331252be1098b19dd7b362afb GIT binary patch literal 3407 zcmd6p&2HO95XZSBMN*&sh~vaf+OTOIKt(Dg?x8JE6e*H4Fc7#v)1tv50K;%sG8KxH zcb7&bq$lMm`UZWUp7z?4U!kYYkp7TmH@1_DDeynb6-}M*2)(qnx zW9D$6{Qyq+CkQhbNeoKBnu$5Is70{PO6*~UR-kROO5zM%>K6S@;ti{`N(@d0HRetU zt@HYz!8}%-n$WKBdeP^iec{k1ZP5j~NSEj`Ul?4O8iSR=)u};O`QpsgwI>FvvHDws z)wwmb=ygV(8STa%tk2kL6Yc%NrFs}j4()|3S4qr8Aowub4U;_+20?$5g(?i9gokOI z_MxNI=c<<{Nhtbq#G}b=9MDa&Fbzh6vp7<5mIirROyR5($zAF`#oL>Y_6RtQ@bL!+ zKIH+N@+k-}fKihXX1+D3#Vlrn1ysxdTVXEqz&fnTYG7SfXAQ6(YqA#DDqCQSU~6m% z!d%u%7~Z4V-ZZ;XMdtT%*_HDe0PfF;aa_smseTD(bk zfW0Nqop3=Ww;ulvFcfD#aLI=}RnlK)cl{^vE>EG=wgu*gsDS8tC?yx_@lGBl+DdrZ zHnlBz($h9l{mT$d81}E~Zf8T@-31q~kwD6-Jt#DLCdtZ(9g`FRI1wP0P+1~Dv;Ogbk3ciuJm z7=3h7(EkJ)v-C^$_}v_mwzl>x7&ZC!5HjPvFi(^ZiJEiiv&>I3j=N7g0HqI>R?0z1!AxI>DGQ~KiT|Srhy3MT zf1KrzmwbOLWRkxw+pbtyFEv2eCLCSjV~uLcYNa8yalleALRqS&3(6D%yGN;x4G}=a?*2Pb+9`eC7n1Iyt6ii z(ragq-NiSB($7}AuDu{gVb=-*T?>L?#_|N)jUd2^tGpu?@Rcv2tfAaM`55I>l+RGs zQI2-FqJ~_Q9{X+g_tixe*t%w3nx`D&S%1Nq>N1Z5*m>n8dF- z@2KHAJNNDfbCr4LRURiyU`3=WWxJ><-8jX9TosiL^#l)DfW>#UsLinugi2auBXJXE z3fx8nZo~ph8iD_j;&Tw{#4?5QUOQ!NrL8a>)7sBPl@UN6tsk}T7qOm(NPFLv+`q?S VN8$$PnGI6@YG6IG0-Jw}{0E2BpWXlf literal 0 HcmV?d00001 From 82639cfb0616c013147001e81dc8b83edeafc14c Mon Sep 17 00:00:00 2001 From: vivekshingate Date: Sun, 9 Dec 2018 16:26:54 +0000 Subject: [PATCH 3/4] Done --- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 181 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 2083 bytes q02_data_cleaning_all_2/build.py | 95 +++++++++++++++++- .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 187 bytes .../q02_test_data_cleaning_2.cpython-36.pyc | Bin 0 -> 4762 bytes 5 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc diff --git a/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..466685548da3756b469bc29be2c32dd5bfc84279 GIT binary patch literal 181 zcmYL?F$w}P6hyONA<7=a#xH9t;t6alY=lotKD*Iu{+gd%@JQar%GO(0nJvVDc?<)? ztjluNKj$Zv^&at4k;5)qCK*;8)S}owR2=I+K4)mSB^ca5S7}_Rbs{)&yh^7HfniJF zxptDA!ANx^5yt_$$QLpRZmSBdP8y9T9r7Nv_gDd<3oaopyZdXpY1#Ikw&S;Ct1lyL BGHL(- literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..13b9f297d3f4174f846d4050eb7a6ea1e6a35fe7 GIT binary patch literal 2083 zcma)8OOM+&5GG|=k{|MGH)+#jZ;JwUvbhC|qT9`)4X_Q0rbPoH06~kiBQNy`B`@MY zd)nrhOaDdx60bewFZ9%*Bx~)(qNoLtGapCa42R^L~qsS4tH-XkX_L$ zv9{^oIK(FdG9)81CKK-Q&L4;zaQq4#bw7h&^zsO6pR=6NKnfOz@e+Vxvd(2F6jdTh zu9yU`J>AX86VP&lubUws8i( z19**m25+V;gmSdP1VPHR3dgTO)3b;F<^e2B~;3-|}Ey@j&0$R6Lj)9<;I{3+*N5 z-Y0zB-qAatJGrxR3+D!3yM?>*U^k$T7Vq-jM{5D|zxH;}KKE<117CFtujni-KH$So z)^)c;P|&V_OydW!+R0;i(B19?3WSy5b>ZG+!OI|2fdAWRK_%-gKQX03!Aa+wuLEYbJ@H=&xuEA!DwB=ia8}$94#JuX zBZ~K>Nczu3YAE77qmMuMJKEE4$idGlNrlQc^i&B(e_1Og^&RjxHD|FhMFVJU_CCx= z_m+kDdlY1FW{zb}e+;ucQ5!NU4`VeM`m0dUQBPZOs_pEZ%9S~g#9x4Yu2`Hc5*2Ab z6H){@<@qKRnReoJG#5%+Dc8NF0DK)qAq=!+5@;t#V#tQJIZWi}K)bIEQy6Op1d-+1 z-E09603ZX&=2A#1s)|BJz%LZ6h|DTWG66!dO!hFp_F9_^_f-0X+yRcE)Ky_tAJ9b@ zGihcD9j#*|!(G=Vw(-7a1N9r8GWcB$+z$5iV<@y+uV7s}?Ke&V`wOWrA?N?l*<09r zv#an*TxM^f_`;NDzbEQ4OIaXhrr1K2UBV)U0Tkn<5?N*{ZK{$LEY9rR<72v4yeX5C z9?$0MQ1WTI(Zl`G<7cMqUO^K1WH_Jx$9$&u&SzTA#@rQ1pZ*2%czX%CYxc~qF literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index e20ff7b..11a184f 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -1,8 +1,10 @@ +# %load q02_data_cleaning_all_2/build.py # Default Imports import pandas as pd import numpy as np from greyatomlib.logistic_regression_project.q02_data_cleaning_all.build import data_cleaning from greyatomlib.logistic_regression_project.q01_outlier_removal.build import outlier_removal +from sklearn.preprocessing import LabelEncoder loan_data = pd.read_csv('data/loan_prediction_uncleaned.csv') loan_data = loan_data.drop('Loan_ID', 1) @@ -10,4 +12,95 @@ X, y, X_train, X_test, y_train, y_test = data_cleaning(loan_data) -# Write your solution here : +# # Write your solution here : + +# def data_cleaning_2(X_train,X_test, y_train, y_test): + +# num_train_cols = X_train._get_numeric_data().columns +# tot_train_cols = X_train.columns +# cat_train_cols = set(tot_train_cols)-set(num_train_cols) + +# for col in num_train_cols: +# X_train[col]=np.sqrt(X_train[col]) + +# label_encoder = LabelEncoder() +# X_train['Gender'] = label_encoder.fit_transform(X_train['Gender']) +# X_train['Married'] = label_encoder.fit_transform(X_train['Married']) +# X_train['Education'] = label_encoder.fit_transform(X_train['Education']) +# X_train['Self_Employed'] = label_encoder.fit_transform(X_train['Self_Employed']) + + + +# for col in cat_train_cols: +# X_train = pd.get_dummies(X_train, columns=[col],drop_first=False) + +# #For Test +# num_test_cols = X_test._get_numeric_data().columns +# tot_test_cols = X_test.columns +# cat_test_cols = set(tot_test_cols)-set(num_test_cols) + +# for col in num_test_cols: +# X_test[col]=np.sqrt(X_test[col]) + +# for col in cat_test_cols: +# X_test = pd.get_dummies(X_test, columns=[col],drop_first=False) + +# X_test['Gender'] = label_encoder.fit_transform(X_test['Gender']) +# X_test['Married'] = label_encoder.fit_transform(X_test['Married']) +# X_test['Education'] = label_encoder.fit_transform(X_test['Education']) +# X_test['Self_Employed'] = label_encoder.fit_transform(X_test['Self_Employed']) + +# return X_train,X_test,y_train, y_test + + + +# data_cleaning_2(X_train,X_test, y_train, y_test) +def data_cleaning_2(X_train,X_test,y_train,y_test): + X_train['ApplicantIncome']=np.sqrt(X_train['ApplicantIncome']) + X_test['ApplicantIncome']=np.sqrt(X_test['ApplicantIncome']) + X_train['CoapplicantIncome']=np.sqrt(X_train['CoapplicantIncome']) + X_test['CoapplicantIncome']=np.sqrt(X_test['CoapplicantIncome']) + X_train['LoanAmount']=np.sqrt(X_train['LoanAmount']) + X_test['LoanAmount']=np.sqrt(X_test['LoanAmount']) + + lablel_encoder = LabelEncoder() + X_train['Gender'] = lablel_encoder.fit_transform(X_train['Gender']) + X_train['Married'] = lablel_encoder.fit_transform(X_train['Married']) + X_train['Education'] = lablel_encoder.fit_transform(X_train['Education']) + X_train['Self_Employed'] = lablel_encoder.fit_transform(X_train['Self_Employed']) + + X_test['Gender'] = lablel_encoder.fit_transform(X_test['Gender']) + X_test['Married'] = lablel_encoder.fit_transform(X_test['Married']) + X_test['Education'] = lablel_encoder.fit_transform(X_test['Education']) + X_test['Self_Employed'] = lablel_encoder.fit_transform(X_test['Self_Employed']) + + + numericals_train = X_train.select_dtypes(include=[np.number]) + categoricals_train = X_train.select_dtypes(exclude=[np.number]) + dummies_train=pd.get_dummies(categoricals_train) + dummies_train_1=dummies_train.loc[:,'Dependents_0':'Dependents_3+'] + dummies_train_2=dummies_train.loc[:,'Property_Area_Rural':'Property_Area_Urban'] + dummies_train_final=pd.concat([dummies_train_1,dummies_train_2],axis=1) + final_X_train=pd.concat([X_train, dummies_train_final], axis = 1) + + final_X_train=final_X_train.drop('Dependents',axis=1) + final_X_train=final_X_train.drop('Property_Area',axis=1) + final_X_train=final_X_train.drop('Credit_History',axis=1) + final_X_train=final_X_train.drop('Loan_Amount_Term',axis=1) + + numericals_test = X_test.select_dtypes(include=[np.number]) + categoricals_test = X_test.select_dtypes(exclude=[np.number]) + dummies_test=pd.get_dummies(categoricals_test) + dummies_test_1=dummies_test.loc[:,'Dependents_0':'Dependents_3+'] + dummies_test_2=dummies_test.loc[:,'Property_Area_Rural':'Property_Area_Urban'] + dummies_test_final=pd.concat([dummies_test_1,dummies_test_2],axis=1) + final_X_test=pd.concat([X_test, dummies_test_final], axis = 1) + + final_X_test=final_X_test.drop('Dependents',axis=1) + final_X_test=final_X_test.drop('Property_Area',axis=1) + final_X_test=final_X_test.drop('Credit_History',axis=1) + final_X_test=final_X_test.drop('Loan_Amount_Term',axis=1) + + + return final_X_train,final_X_test,y_train,y_test + diff --git a/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02c3aa68a77db0beec1ef9ec951cbcab12b0a47f GIT binary patch literal 187 zcmYL@F$%&!6ht>*AxaKn8{xC;ev&nt{c-;$;E}wKm94k1a#M%{^B4vO zW?Pnv;kmr1toMYUiX0BvGRv^)q*lf8q2g5k@oAvpj$m{nwbHmy>qKzo*h;SrfniJF zxptDA!ANx^5yt`h$X7B6Zkr0NP8y9T9r7Nv_gFz9Ndky2xP-Xw@2~l;V>@`-PY)zp FeF1TsG|~V7 literal 0 HcmV?d00001 diff --git a/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc b/q02_data_cleaning_all_2/tests/__pycache__/q02_test_data_cleaning_2.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..09159ab202833d0797e20f767433c8bdaa38a6fa GIT binary patch literal 4762 zcmd5=Nt4q?6xL>0-e#YHfdC2#5T}TZhb)9Z3W}|$N;#>J6qJf8A>2J48Cf#j(iC=) zley#{#3Ls#hlBe6N#*4Mj7qAPWnuh8 zd3eX+bf7>DR4fxGbnJ#l9pCkv_6}5)CaSPa5QUygnayZB*mpdrT58g8H}X7(HF<|N zdgt+-suwuE-C>ltjnEBzJMv>-N-7P$Z)Ly3#}DrgWcajIyh}UOC)5wQU9;AIU_pnn zuxsCD)UkhN&mF&kogV$LySL!x9tavwj-?QpC~qW7#mRwEBN?JYnITzXK&d0`P-aPi z6rnT{YOEYtCM)niPgcnqlm)U*HlQq$O|k`LiJTxOpqm1MSd-{lu&&w6M0+eLr0;LssFpmmefbPlHe0sogv&n1JOHo?-ei_ zQepzRQ5y*|FObXT19zYLpp`Y2g?0=Zi-N;B(01=-dPxT z_Jmc~j_27s)ewl`@l(_eHQA|jy5j7#2ljYm>(D!nBau`??(Iw=x-uJJVjd7Eo`gq6KAFlV+hE%p-Do63^de^O~dH@F$-u$7|UW7Oi;iI!5%}UrM9$I zhRC6g)|s+BrQZAgfC4abGwgP#xf?K((J*3u!iVW{^I6C7iNj5Fh5O8D(`T2>ZV-W; z(qUV$Eb{^{XT?flmk;?vpNHVeROnA^q(CSgBD6-psL1RhhRBWKgod9k%L$8z2)s0v zz@&>dzGo*NI0u8~NP#3T$%@?j=GbKZ7wdham2JSVL>30h9M@-C@QS2=<@hzlneb)T)rKXL-_JLVR8ueeln>;@4&B9 z#)uxsOcXPW@&Da9!L1x(a4~mIuqFr`=L+oPC`v+}hD_arTs_VcTHSpNQFU+|n)R;z zm=1}JAj0HK0>~Bzse+EucbrGeq!Hu_%n(BJAjgVa?7w!rl$t--#kh(2eva(m3Pf5U z(4}w|Wo8qZ>BHyWni+NhP(*%AOl^@MmKss*E5I93Gj{+>11-qS;4ySyfD6KR#ElV$ zZ|K<(%v_um-iHGMN4=in<@&sV{QzcBRv5B^IqMy~Q@WS^TScuJxF0kUf3NJE@1H%x*J^)>0;+K!G zl=3V~iC@IE6ejOmfHiWWoAVmW1k(Jp{0iTW2euF`=>8M5Y+}V1AdVnq=IUrX6mXz; zX+4a&a|f0*P`$h?4BPf$5p3HcZ`}rz+r|xD@`SBo&vB!~PNO)3;zJZ4p}2(N zQxupg83r8oC5me(4i?d|hFcMq1F;4-$5o>t8#-K>_|gq^bj7X8r!75M^T+937?JOW zXgmzZFEY;~*CX*p)Vc$E#;y|vZO?sPnX#3uOx;RW5~K{ETDOmnRI?AAObl^w`7g4r zk4F<>yh*bRI94Dw9z*2fQwrS8!hr#tItclRkh`(lu-Hg&^T^I)SBi;WCn=m=#ClxO mvCmM{P#~AnNEXJ8WH!GA=ax7^8gg-TDS6Ngc|+bB*Zd1gMkKfZ literal 0 HcmV?d00001 From 345956e60c2efdcb2ab6b9a2f5ea527427908148 Mon Sep 17 00:00:00 2001 From: vivekshingate Date: Tue, 11 Dec 2018 06:57:45 +0000 Subject: [PATCH 4/4] Done --- .../__pycache__/build.cpython-36.pyc | Bin 2083 -> 1430 bytes q02_data_cleaning_all_2/build.py | 132 +++++++++--------- .../__pycache__/__init__.cpython-36.pyc | Bin 0 -> 181 bytes .../__pycache__/build.cpython-36.pyc | Bin 0 -> 1314 bytes q03_logistic_regression/build.py | 15 ++ .../tests/__pycache__/__init__.cpython-36.pyc | Bin 0 -> 187 bytes ...est_q03_logistic_regression.cpython-36.pyc | Bin 0 -> 2301 bytes 7 files changed, 79 insertions(+), 68 deletions(-) create mode 100644 q03_logistic_regression/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/__pycache__/build.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc create mode 100644 q03_logistic_regression/tests/__pycache__/test_q03_logistic_regression.cpython-36.pyc diff --git a/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc b/q02_data_cleaning_all_2/__pycache__/build.cpython-36.pyc index 13b9f297d3f4174f846d4050eb7a6ea1e6a35fe7..a29dbf4042a1c304e6923df63a30f99dc27a802c 100644 GIT binary patch delta 802 zcmaJ;%Zd~+6iw<~)t#YR+M&mhK~T^FVHR%Ph&{N`L72rrqZDN)Jyy~6=p+r(kfIm8 z7Q_aBz?JU&2^abY`2s({m2SML8FggPMQ-lB=bW6I7pnyCdOo%5t&GymSS4*BJOU!IX72imcC%Kgq4#D?lRtja}6ge6S$@r zZ^^3F{I21C7jSI@YfM@Z6Y@+$TFsxbmHaB-BfbOEeoyvaRt4MN-173nJ~YG~2*~p6 z2z$6^MR*(5!0eYgLyPYMHy&uOPD`2!eg#VH!W8wA_ulxQJnc~UwapiQ_o|_`mKVac K%@6-Jc7Fo4kL~qsS4tH-XkX_L$ zv9{^oIK(FdG9)81CKK-Q&L4;zaQq4#bw7h&^zsO6pR=6NKnfOz@e+Vxvd(2F6jdTh zu9yU`J>AX86VP&lubUws8i( z19**m25+V;gmSdP1VPHR3dgTO)3b;F<^e2B~;3-|}Ey@j&0$R6Lj)9<;I{3+*N5 z-Y0zB-qAatJGrxR3+D!3yM?>*U^k$T7Vq-jM{5D|zxH;}KKE<117CFtujni-KH$So z)^)c;P|&V_OydW!+R0;i(B19?3WSy5b>ZG+!OI|2fdAWRK_%-gKQX03!Aa+wuLEYbJ@H=&xuEA!DwB=ia8}$94#JuX zBZ~K>Nczu3YAE77qmMuMJKEE4$idGlNrlQc^i&B(e_1Og^&RjxHD|FhMFVJU_CCx= z_m+kDdlY1FW{zb}e+;ucQ5!NU4`VeM`m0dUQBPZOs_pEZ%9S~g#9x4Yu2`Hc5*2Ab z6H){@<@qKRnReoJG#5%+Dc8NF0DK)qAq=!+5@;t#V#tQJIZWi}K)bIEQy6Op1d-+1 z-E09603ZX&=2A#1s)|BJz%LZ6h|DTWG66!dO!hFp_F9_^_f-0X+yRcE)Ky_tAJ9b@ zGihcD9j#*|!(G=Vw(-7a1N9r8GWcB$+z$5iV<@y+uV7s}?Ke&V`wOWrA?N?l*<09r zv#an*TxM^f_`;NDzbEQ4OIaXhrr1K2UBV)U0Tkn<5?N*{ZK{$LEY9rR<72v4yeX5C z9?$0MQ1WTI(Zl`G<7cMqUO^K1WH_Jx$9$&u&SzTA#@rQ1pZ*2%czX%CYxc~qF diff --git a/q02_data_cleaning_all_2/build.py b/q02_data_cleaning_all_2/build.py index 11a184f..f64827e 100644 --- a/q02_data_cleaning_all_2/build.py +++ b/q02_data_cleaning_all_2/build.py @@ -14,93 +14,89 @@ # # Write your solution here : -# def data_cleaning_2(X_train,X_test, y_train, y_test): - -# num_train_cols = X_train._get_numeric_data().columns -# tot_train_cols = X_train.columns -# cat_train_cols = set(tot_train_cols)-set(num_train_cols) - -# for col in num_train_cols: -# X_train[col]=np.sqrt(X_train[col]) - -# label_encoder = LabelEncoder() -# X_train['Gender'] = label_encoder.fit_transform(X_train['Gender']) -# X_train['Married'] = label_encoder.fit_transform(X_train['Married']) -# X_train['Education'] = label_encoder.fit_transform(X_train['Education']) -# X_train['Self_Employed'] = label_encoder.fit_transform(X_train['Self_Employed']) +def data_cleaning_2(X_train,X_test, y_train, y_test): + label_encoder = LabelEncoder() + #For Train data + num_train_cols = X_train._get_numeric_data().columns + tot_train_cols = X_train.columns + cat_train_cols = set(tot_train_cols)-set(num_train_cols) -# for col in cat_train_cols: -# X_train = pd.get_dummies(X_train, columns=[col],drop_first=False) + for col in num_train_cols: + X_train[col]=np.sqrt(X_train[col]) + + for col in ['Dependents','Property_Area']: + X_test = pd.get_dummies(X_test, columns=[col]) + + for col in ['Gender','Married','Education','Self_Employed']: + X_test[col] = label_encoder.fit_transform(X_test[col]) -# #For Test -# num_test_cols = X_test._get_numeric_data().columns -# tot_test_cols = X_test.columns -# cat_test_cols = set(tot_test_cols)-set(num_test_cols) + #For Test data + num_test_cols = X_test._get_numeric_data().columns + tot_test_cols = X_test.columns + cat_test_cols = set(tot_test_cols)-set(num_test_cols) -# for col in num_test_cols: -# X_test[col]=np.sqrt(X_test[col]) + for col in num_test_cols: + X_test[col]=np.sqrt(X_test[col]) -# for col in cat_test_cols: -# X_test = pd.get_dummies(X_test, columns=[col],drop_first=False) + for col in ['Dependents','Property_Area']: + X_test = pd.get_dummies(X_test, columns=[col]) -# X_test['Gender'] = label_encoder.fit_transform(X_test['Gender']) -# X_test['Married'] = label_encoder.fit_transform(X_test['Married']) -# X_test['Education'] = label_encoder.fit_transform(X_test['Education']) -# X_test['Self_Employed'] = label_encoder.fit_transform(X_test['Self_Employed']) + for col in ['Gender','Married','Education','Self_Employed']: + X_test[col] = label_encoder.fit_transform(X_test[col]) -# return X_train,X_test,y_train, y_test + return X_train,X_test,y_train, y_test # data_cleaning_2(X_train,X_test, y_train, y_test) -def data_cleaning_2(X_train,X_test,y_train,y_test): - X_train['ApplicantIncome']=np.sqrt(X_train['ApplicantIncome']) - X_test['ApplicantIncome']=np.sqrt(X_test['ApplicantIncome']) - X_train['CoapplicantIncome']=np.sqrt(X_train['CoapplicantIncome']) - X_test['CoapplicantIncome']=np.sqrt(X_test['CoapplicantIncome']) - X_train['LoanAmount']=np.sqrt(X_train['LoanAmount']) - X_test['LoanAmount']=np.sqrt(X_test['LoanAmount']) +# def data_cleaning_2(X_train,X_test,y_train,y_test): +# X_train['ApplicantIncome']=np.sqrt(X_train['ApplicantIncome']) +# X_test['ApplicantIncome']=np.sqrt(X_test['ApplicantIncome']) +# X_train['CoapplicantIncome']=np.sqrt(X_train['CoapplicantIncome']) +# X_test['CoapplicantIncome']=np.sqrt(X_test['CoapplicantIncome']) +# X_train['LoanAmount']=np.sqrt(X_train['LoanAmount']) +# X_test['LoanAmount']=np.sqrt(X_test['LoanAmount']) - lablel_encoder = LabelEncoder() - X_train['Gender'] = lablel_encoder.fit_transform(X_train['Gender']) - X_train['Married'] = lablel_encoder.fit_transform(X_train['Married']) - X_train['Education'] = lablel_encoder.fit_transform(X_train['Education']) - X_train['Self_Employed'] = lablel_encoder.fit_transform(X_train['Self_Employed']) +# lablel_encoder = LabelEncoder() +# X_train['Gender'] = lablel_encoder.fit_transform(X_train['Gender']) +# X_train['Married'] = lablel_encoder.fit_transform(X_train['Married']) +# X_train['Education'] = lablel_encoder.fit_transform(X_train['Education']) +# X_train['Self_Employed'] = lablel_encoder.fit_transform(X_train['Self_Employed']) - X_test['Gender'] = lablel_encoder.fit_transform(X_test['Gender']) - X_test['Married'] = lablel_encoder.fit_transform(X_test['Married']) - X_test['Education'] = lablel_encoder.fit_transform(X_test['Education']) - X_test['Self_Employed'] = lablel_encoder.fit_transform(X_test['Self_Employed']) +# X_test['Gender'] = lablel_encoder.fit_transform(X_test['Gender']) +# X_test['Married'] = lablel_encoder.fit_transform(X_test['Married']) +# X_test['Education'] = lablel_encoder.fit_transform(X_test['Education']) +# X_test['Self_Employed'] = lablel_encoder.fit_transform(X_test['Self_Employed']) - numericals_train = X_train.select_dtypes(include=[np.number]) - categoricals_train = X_train.select_dtypes(exclude=[np.number]) - dummies_train=pd.get_dummies(categoricals_train) - dummies_train_1=dummies_train.loc[:,'Dependents_0':'Dependents_3+'] - dummies_train_2=dummies_train.loc[:,'Property_Area_Rural':'Property_Area_Urban'] - dummies_train_final=pd.concat([dummies_train_1,dummies_train_2],axis=1) - final_X_train=pd.concat([X_train, dummies_train_final], axis = 1) +# numericals_train = X_train.select_dtypes(include=[np.number]) +# categoricals_train = X_train.select_dtypes(exclude=[np.number]) +# dummies_train=pd.get_dummies(categoricals_train) +# dummies_train_1=dummies_train.loc[:,'Dependents_0':'Dependents_3+'] +# dummies_train_2=dummies_train.loc[:,'Property_Area_Rural':'Property_Area_Urban'] +# dummies_train_final=pd.concat([dummies_train_1,dummies_train_2],axis=1) +# final_X_train=pd.concat([X_train, dummies_train_final], axis = 1) - final_X_train=final_X_train.drop('Dependents',axis=1) - final_X_train=final_X_train.drop('Property_Area',axis=1) - final_X_train=final_X_train.drop('Credit_History',axis=1) - final_X_train=final_X_train.drop('Loan_Amount_Term',axis=1) +# final_X_train=final_X_train.drop('Dependents',axis=1) +# final_X_train=final_X_train.drop('Property_Area',axis=1) +# final_X_train=final_X_train.drop('Credit_History',axis=1) +# final_X_train=final_X_train.drop('Loan_Amount_Term',axis=1) - numericals_test = X_test.select_dtypes(include=[np.number]) - categoricals_test = X_test.select_dtypes(exclude=[np.number]) - dummies_test=pd.get_dummies(categoricals_test) - dummies_test_1=dummies_test.loc[:,'Dependents_0':'Dependents_3+'] - dummies_test_2=dummies_test.loc[:,'Property_Area_Rural':'Property_Area_Urban'] - dummies_test_final=pd.concat([dummies_test_1,dummies_test_2],axis=1) - final_X_test=pd.concat([X_test, dummies_test_final], axis = 1) +# numericals_test = X_test.select_dtypes(include=[np.number]) +# categoricals_test = X_test.select_dtypes(exclude=[np.number]) +# dummies_test=pd.get_dummies(categoricals_test) +# dummies_test_1=dummies_test.loc[:,'Dependents_0':'Dependents_3+'] +# dummies_test_2=dummies_test.loc[:,'Property_Area_Rural':'Property_Area_Urban'] +# dummies_test_final=pd.concat([dummies_test_1,dummies_test_2],axis=1) +# final_X_test=pd.concat([X_test, dummies_test_final], axis = 1) - final_X_test=final_X_test.drop('Dependents',axis=1) - final_X_test=final_X_test.drop('Property_Area',axis=1) - final_X_test=final_X_test.drop('Credit_History',axis=1) - final_X_test=final_X_test.drop('Loan_Amount_Term',axis=1) +# final_X_test=final_X_test.drop('Dependents',axis=1) +# final_X_test=final_X_test.drop('Property_Area',axis=1) +# final_X_test=final_X_test.drop('Credit_History',axis=1) +# final_X_test=final_X_test.drop('Loan_Amount_Term',axis=1) - return final_X_train,final_X_test,y_train,y_test +# return final_X_train,final_X_test,y_train,y_test diff --git a/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f9a149e06f016420b953cec16da925f8636985ea GIT binary patch literal 181 zcmXr!<>e~9!X3i^1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuO$79{M=Oivdps7 z?Ba~fy!6D9RQ>S$qU_>=#N<@{`TK_=2MRtkmQZ n{Xzrdcr5bz@$s2?nI-Y@dIgoYIBatBQ%ZAE?Lc-H12F>tI%hKm literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/__pycache__/build.cpython-36.pyc b/q03_logistic_regression/__pycache__/build.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f980fe2980ba471e645f195f0a444a97d02223b2 GIT binary patch literal 1314 zcmbtUJ8#=C5GEyA@*|3q*m-sA=EY8(H7FXS9RdUynxO#^Kww1Ll|wy3%0^u%liIz1 zqicUf|3bNT%3tWzqZ+qK-~t6IgQxR%$KyRdYTx&p--7nV*BV0K(Y?n7{RJ%b1q4G3 z3q&xyEn&qrv9ZBy;lwpkt8_=Wu}8d0uZeoxAdO0QMKf-ZR;7DlCH9Gr5yz8NRxdGW z^Y&zoHCVH>KyUDNg|*E4!X@iugLFujY?3YR^A7LwO}FuuS8RV5v_aa#W%lr*I~?KobjEjy_rpGmF_{dt~jRaw&d*svY<* zJC@)FD7WFHT2Y%g1uCr4!U*(QflGYj=5ApZZfQYVom1UD%rX&$G|7*XFparhKT7E% zsA7##-GYlS5p9*mo$&gsT`madA@Gh3Q6uK@s zWr4Xljp|~VgSEqCnrVEl@m#Khm)tPI3_x~`FfFf`a8K+2rt1gL&Z*$f4FS~?5Oo;R Q@1NBKy^gzh%c=W60Yx5zpa1{> literal 0 HcmV?d00001 diff --git a/q03_logistic_regression/build.py b/q03_logistic_regression/build.py index cdbd506..fd088f0 100644 --- a/q03_logistic_regression/build.py +++ b/q03_logistic_regression/build.py @@ -1,3 +1,4 @@ +# %load q03_logistic_regression/build.py # Default Imports import pandas as pd from sklearn.preprocessing import StandardScaler @@ -15,4 +16,18 @@ # Write your solution code here: +def logistic_regression(X_train, X_test, y_train, y_test): + stand_scale = StandardScaler() + X_train.loc[:,['ApplicantIncome','CoapplicantIncome','LoanAmount']]=stand_scale.fit_transform(X_train.loc[:,['ApplicantIncome','CoapplicantIncome','LoanAmount']]) + X_test.loc[:,['ApplicantIncome','CoapplicantIncome','LoanAmount']]=stand_scale.fit_transform(X_test.loc[:,['ApplicantIncome','CoapplicantIncome','LoanAmount']]) + + model = LogisticRegression() + model.fit(X_train,y_train) + y_pred = model.predict(X_test) + + return confusion_matrix(y_test,y_pred) + + + +logistic_regression(X_train, X_test, y_train, y_test) diff --git a/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc b/q03_logistic_regression/tests/__pycache__/__init__.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c0609279cf7f512837a303c90e3f9aa07faf5b8d GIT binary patch literal 187 zcmXr!<>e~9!X3i^1dl-k3@`#24nSPY0whuxf*CX!{Z=v*frJsnuMGW+{M=Oivdps7 z?Ba~fy!6D9RQ>S$qU_>=#N<@{`TK_=2MRtkmQZ t{Xzrdcr5bzC8@z#Ic?K+ z758N)?b0r>1R1XIr75ASVs&_rxA^kZ2EQd%tC)xFE05l%59k_wNZ09x@Wq;VDAvWs z)P3d7qV4M_{p^Ls+r0DM;vL~m9lFWMk5<^dfIO_Dkd*#ULYqCN1^6A#42xnZSe9f1 z2&^mwqFoH-g^`I+krHWs#^i+!KoEZ*^Fg9b5<|y839XYngK&2s%yA(lQv*E`@nj1T z`!Z))G*W^mv4L?>kyVol-i!4aZ60Fx!4DS%pkrL<)q`_AgIDi>5mgC*e!}hd7InD8 zU1(k2;2yLM-sC>C9$$ilTjd5OdidcYW1Iya1oz?HgIDi@F_s}y@+&#FhPH9e2`8J@ z6efd-=Kdes>(AjBwEz|`8tGav%&U{_!s^AWs)xbHraA4Hr>5P6Zj#(j10CFva4tO49xd~Ec;*(%d z6=QKk=v)^@wE-)%RTmnyf(GkQn|t7uU*DO^C8*wjVKpTLD0%wHlw9G=7(t2QiBvK2 z1vsEk(5kVA4kyM%)y`?sKAId}QvusAFyoO3j&l_#VG5Piyab63US~x*8uv2Jlw#x8 zJHa?FK)m8|AjmD))?uSut;Kqv4>E077K_r3)3Q;0O`DgWmoCl|lA5|3tc7>7R<9|l zP96ln{4{23kruJZFV(6oj%&Fb#Zk?MNG=xn<9~1;g+ZS{|hPFy~ILI(C7ai z>v44BQoO`zV6u&Rebph~OftL$O?A9bUkSz|yz!Q*3lSv`*O$siC3#trF>St#jABVf uJ#c_7+8kd6z(M{{R~5~-Ri*U5t;_m7(yXy~P#V&n`?bLL$r@QFUHc!|ikOrD literal 0 HcmV?d00001