You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
importnumpyasnpimportpandasaspdimportmatplotlib.pyplotaspltimportseabornassns#clustering model libraryfromsklearn.clusterimportKMeansfromscipy.cluster.hierarchyimportlinkage, dendrogramfromsklearn.clusterimportAgglomerativeClustering
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 33 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 1000 non-null int64
1 Gender 1000 non-null object
2 Age 1000 non-null int64
3 Education 992 non-null float64
4 Occupation 941 non-null object
5 State 1000 non-null object
6 Relationship.Status 996 non-null object
7 Obligation 1000 non-null float64
8 Obligation2 1000 non-null object
9 NEP 1000 non-null float64
10 Vacation.Behaviour 975 non-null float64
11 rest and relax 1000 non-null object
12 luxury / be spoilt 1000 non-null object
13 do sports 1000 non-null object
14 excitement, a challenge 1000 non-null object
15 not exceed planned budget 1000 non-null object
16 realise creativity 1000 non-null object
17 fun and entertainment 1000 non-null object
18 good company 1000 non-null object
19 health and beauty 1000 non-null object
20 free-and-easy-going 1000 non-null object
21 entertainment facilities 1000 non-null object
22 not care about prices 1000 non-null object
23 life style of the local people 1000 non-null object
24 intense experience of nature 1000 non-null object
25 cosiness/familiar atmosphere 1000 non-null object
26 maintain unspoilt surroundings 1000 non-null object
27 everything organised 1000 non-null object
28 unspoilt nature/natural landscape 1000 non-null object
29 cultural offers 1000 non-null object
30 change of surroundings 1000 non-null object
31 Income(k$) 200 non-null float64
32 Expenditure 200 non-null float64
dtypes: float64(6), int64(2), object(25)
memory usage: 257.9+ KB
print(pd.isnull(data).sum())
ID 0
Gender 0
Age 0
Education 8
Occupation 59
State 0
Relationship.Status 4
Obligation 0
Obligation2 0
NEP 0
Vacation.Behaviour 25
rest and relax 0
luxury / be spoilt 0
do sports 0
excitement, a challenge 0
not exceed planned budget 0
realise creativity 0
fun and entertainment 0
good company 0
health and beauty 0
free-and-easy-going 0
entertainment facilities 0
not care about prices 0
life style of the local people 0
intense experience of nature 0
cosiness/familiar atmosphere 0
maintain unspoilt surroundings 0
everything organised 0
unspoilt nature/natural landscape 0
cultural offers 0
change of surroundings 0
Income(k$) 800
Expenditure 800
dtype: int64
ID 0
Gender 0
Age 0
Education 0
Occupation 0
State 0
Relationship.Status 0
Obligation 0
Obligation2 0
NEP 0
Vacation.Behaviour 0
rest and relax 0
luxury / be spoilt 0
do sports 0
excitement, a challenge 0
not exceed planned budget 0
realise creativity 0
fun and entertainment 0
good company 0
health and beauty 0
free-and-easy-going 0
entertainment facilities 0
not care about prices 0
life style of the local people 0
intense experience of nature 0
cosiness/familiar atmosphere 0
maintain unspoilt surroundings 0
everything organised 0
unspoilt nature/natural landscape 0
cultural offers 0
change of surroundings 0
Income(k$) 0
Expenditure 0
dtype: int64
/home/reddy/.local/lib/python3.10/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
plt.figure(figsize=(20,7))
gender= ['Male', 'Female']
foriingender:
plt.scatter(x='Age',y='Income(k$)', data=data[data['Gender']==i],s=200 , alpha=0.5 , label=i)
plt.legend()
plt.xlabel("Age")
plt.ylabel("Annual Income")
plt.title("Annual Income according to Age")
plt.show()
plt.figure(figsize=(20,7))
gender= ['Male', 'Female']
foriingender:
plt.scatter(x='Age',y='Expenditure', data=data[data['Gender']==i],s=200 , alpha=0.5 , label=i)
plt.legend()
plt.xlabel("Age")
plt.ylabel("Expenditure(k$)")
plt.title("Expenditure according to Age")
plt.show()
plt.figure(figsize=(20,7))
gender= ['Male', 'Female']
foriingender:
plt.scatter(x='Age',y='Education', data=data[data['Gender']==i],s=200 , alpha=0.5 , label=i)
plt.legend()
plt.xlabel("Age")
plt.ylabel("Education")
plt.title("Education according to Age")
plt.show()
plt.figure(figsize=(20,7))
gender= ['Male', 'Female']
foriingender:
plt.scatter(x='Income(k$)',y='Age', data=data[data['Gender']==i],s=200 , alpha=0.5 , label=i)
plt.legend()
plt.xlabel("Anual Income")
plt.ylabel("Age")
plt.title("Anual Income according to Age")
plt.show()
#define k valuefromsklearnimportpreprocessingwcss= []
data_model=data.drop(['Gender','ID','Occupation','State', 'Relationship.Status', 'Obligation2'],axis=1)
forkinrange(1,15):
kmeans=KMeans(n_clusters=k)
kmeans.fit(data_model)
wcss.append(kmeans.inertia_)
# the best value is elbow value. It's 5.plt.figure(figsize=(15,5))
plt.plot(range(1,15),wcss)
plt.xlabel("number of k (cluster) value")
plt.ylabel("wcss")
plt.show()
#create demogram and find the best clustering valuemerg=linkage(data_model,method="ward")
plt.figure(figsize=(25,10))
dendrogram(merg,leaf_rotation=90)
plt.xlabel("data points")
plt.ylabel("euclidean distance")
plt.show()