Statement of Completion#7e0a4977
Visualizations with Matplotlib
easy
Introduction to Data Visualization : Datasaurus Dozen
Resolution
Activities
Visualization analysis¶
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
In [ ]:
df=pd.read_csv('datasaurus.csv')
# print first 5 rows
df.head()
Grouping the dataset by the dataset
column and them finding the mean
, median
, and standard deviation
of the x
and y
columns for each dataset.
In [ ]:
# Group the dataset by the 'group' column
grouped_data = df.groupby('dataset')
# Calculate summary statistics for each group
summary_stats = grouped_data.agg({
'x': ['mean', 'median', 'std'],
'y': ['mean', 'median', 'std']
})
# Print the summary statistics for each group
print(summary_stats)
Plotting the x
and y
columns for each dataset/category as scatter plots
.
In [ ]:
# List of unique dataset names
datasets = df['dataset'].unique()
# Create subplots with multiple columns (e.g., 4 columns per row)
num_cols = 4
num_rows = len(datasets) // num_cols + (len(datasets) % num_cols > 0)
# Create subplots
fig, axs = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Flatten the axs array to iterate over subplots
axs = axs.flatten()
# Iterate through datasets and create scatter plots
for i, dataset in enumerate(datasets):
ax = axs[i]
# Filter data for the current dataset
subset_data = df[df['dataset'] == dataset]
# Plot the scatter plot
ax.scatter(subset_data['x'], subset_data['y'])
# Set title for the subplot
ax.set_title(f'Dataset: {dataset}')
# Add labels to the axes
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
# Remove any empty subplots
for i in range(len(datasets), num_rows * num_cols):
fig.delaxes(axs[i])
# Adjust layout
plt.tight_layout()
# Show the plots
plt.show()