Wednesday, August 31, 2022

Box Plot for All Column

 plt.figure(figsize=(10,10))

sns.boxplot(data=df)
plt.show()

List to Dictionary to DataFrame

#List
height = [151174138186128136179163152131]

weight = [63815691475776726248]


##List to Dictionary

baby_dic = {'height':height,'weight':weight} 



#dictionary to DataFrame

baby_data = pd.DataFrame(baby_dic)

Tuesday, August 30, 2022

Outlier Detecting in Pandas

 Q1 = df_boston['CRIM'].quantile(0.25)

Q3 = df_boston['CRIM'].quantile(0.75)
IQR = Q3 - Q1

Outlier_min = Q1 - 1.5 * IQR 
Outlier_max = Q3 + 1.5 * IQR
print(IQR, Outlier_min, Outlier_max)


###outlier dealing
df_boston['CRIM'] = np.where(df_boston['CRIM']>= Outlier_max, Outlier_max,df_boston['CRIM'])
df_boston['CRIM'] = np.where(df_boston['CRIM']<= Outlier_min, Outlier_min,df_boston['CRIM'])

Pandas Series and Dataframe

 # Series from list

a = [4100453272]
example1 = pd.Series(a)
print(example1)

# Series with index
example2 = pd.Series(a, index = ["a""b""c","d","e","f"])


# Series from Data Dictionary
datadict = {"data1"420"data2"380"data3"390}
example3 = pd.Series(datadict)


# Dataframe from list
data = [['Alex',10],['Bob',12],['Clarke',13]]
df1 = pd.DataFrame(data,columns=['Name','Age'])
df1


# Add new Column in Dataframe
df2['Address'] = pd.Series(['Mumbai','Pune','Delhi'])


# Add new rows
data = [['Ehsan',10,'Mumbai'],['Rahman',12,'Goa'],['Himlu',13,'Milan']]
df3 = pd.DataFrame(data,columns=['Name','Age','Address'])
df = df2.append(df3)
df

Usefull Pandas Statical Formula

 #data.sex.describe()

Describinf sing Column


#data.std()
For Standard Deviation

#Skewness and Kurtosis

print(skew(data.pollution_exp, axis=0, bias=True))
print(kurtosis(data.pollution_exp, axis=0, bias=True))

Some Usefull Pandas Formula

 # data.info()

use for displaying Column Name,[Data type], null/Non/Null


#data.describe().T
use to display Mean,Median Mode, Percentile,Min and max Value

#data.corr().style.background_gradient(cmap='coolwarm')
use to Display and Calculate Correlation among Columns,

#data.isna()
for finding null value in columns

#data.isna().sum()

for displaying total null values in columns


#data['classification'].unique()

to display Unique Value

#data[['classification','id']].groupby('classification').count()

to display Count value group by naother column


#data.drop(['id','rbc'],axis=1,inplace=True)
droping Columns

#data['age'].mean()

calculating Mean of Column

#data['dm'].replace(to_replace = {' yes':'yes'},inplace=True)

replacing a value with another value


#data['appet'].fillna( data['appet'].mode()[0], inplace=True)

Filling Null Value with Mode


# for col in data.columns:
    print(f"{col} has {data[col].unique()} values\n")


Displaying all columns Unique Value



#
g = sns.PairGrid(data)
g.map_diag(plt.hist)
g.map_offdiag(plt.scatter)

Displaying Pair Plot


#sns.catplot(x="classification", y="age",data=data,hue='appet',col='htn')

Displaying Category Plot


#sns.boxplot(x='htn',y='age',data=notchk_age)

Displaying Box Plot

Google Drive access from Google Colab

 import pandas as pd



from google.colab import drive
drive.mount('/content/drive')

data= pd.read_csv('/content/drive/MyDrive/DataScience/Content/kidney_disease.csv')
data.head()