Download any dataset and do the following: a. Count number of categorical and numeric features b. Remove one correlated attribute (if any) c. Display five-number summary of each attribute and show it visually

Download any dataset and do the following:

CODE

import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

# Load the Iris dataset into a pandas DataFrame

iris_df = pd.read_csv('iris.data', header=None,

names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class'])

# Count the number of categorical and numeric features

categorical_features = iris_df.select_dtypes(include=['object']).columns

numeric_features = iris_df.select_dtypes(include=['float64']).columns

print(f"Number of categorical features: {len(categorical_features)}")

print(f"Number of numeric features: {len(numeric_features)}")

# Calculate the correlation matrix

correlation_matrix = iris_df[numeric_features].corr()

# Find the most highly correlated attribute

most_correlated_attribute = correlation_matrix.abs().sum().idxmax()

# Remove the most highly correlated attribute

iris_df = iris_df.drop(columns=most_correlated_attribute)

# Display the five-number summary of each attribute and show it visually

plt.figure(figsize=(10, 6))

sns.boxplot(data=iris_df, orient='h')

plt.title("Five-Number Summary of Iris Dataset")

plt.xlabel("Value")

plt.show()

# Display the summary statistics of each attribute

summary_statistics = iris_df.describe()

print(summary_statistics)