Import iris data using sklearn library . Compute mean, mode, median, standard deviation, confidence interval and standard error for each feature ii. Compute correlation coefficients between each pair of features and plot heatmap iii. Find covariance between length of sepal and petal iv. Build contingency table for class feature
Import iris data using sklearn library or (Download IRIS data from:
(https://archive.ics.uci.edu/ml/datasets/iris or import it from sklearn.datasets)
i. Compute mean, mode, median, standard deviation, confidence interval and
standard error for each feature
ii. Compute correlation coefficients between each pair of features and plot heatmap
iii. Find covariance between length of sepal and petal
iv. Build contingency table for class feature
CODE
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt
# Load the Iris dataset
iris = load_iris()
data = iris.data
feature_names = iris.feature_names
target = iris.target
target_names = iris.target_names
# Convert data to a pandas DataFrame for easier analysis
df = pd.DataFrame(data, columns=feature_names)
df['class'] = target_names[target]
# Compute mean, mode, median, standard deviation, confidence interval, and standard error for each feature
statistics = {
'mean': df.mean(),
'mode': df.mode().iloc[0],
'median': df.median(),
'std_dev': df.std(),
'confidence_interval': [stats.norm.interval(0.95, loc=mean, scale=std_dev / np.sqrt(len(df))) for mean, std_dev in zip(df.mean(), df.std())],
'std_error': df.sem()
}
# Display statistics for each feature
for feature in feature_names:
print(f"Statistics for {feature}:")
print(f"Mean: {statistics['mean'][feature]}")
print(f"Mode: {statistics['mode'][feature]}")
print(f"Median: {statistics['median'][feature]}")
print(f"Standard Deviation: {statistics['std_dev'][feature]}")
print(f"Confidence Interval (95%): {statistics['confidence_interval'][feature]}")
print(f"Standard Error: {statistics['std_error'][feature]}")
print()
# Compute correlation coefficients between each pair of features
correlation_matrix = df.corr()
# Plot heatmap of correlation coefficients
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Coefficients Heatmap")
plt.show()
# Find covariance between length of sepal and petal
covariance = np.cov(df['sepal length (cm)'], df['petal length (cm)'])[0, 1]
print(f"Covariance between sepal length and petal length: {covariance}")
# Build contingency table for class feature
contingency_table = pd.crosstab(index=df['class'], columns='count')
print("Contingency Table:")
print(contingency_table)
Comments
Post a Comment