You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
211 lines
7.1 KiB
211 lines
7.1 KiB
"""
|
|
Basic Data Visualization
|
|
|
|
Learn how to create simple, effective visualizations to communicate your findings.
|
|
Visualizations help you and others understand data at a glance.
|
|
|
|
What you'll learn:
|
|
- How to create bar charts
|
|
- How to create line plots
|
|
- How to create pie charts
|
|
- How to customize and save your visualizations
|
|
|
|
Prerequisites:
|
|
- pandas library (install with: pip install pandas)
|
|
- matplotlib library (install with: pip install matplotlib)
|
|
"""
|
|
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
print("=" * 70)
|
|
print("Basic Data Visualization Tutorial")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
# Load data
|
|
print("📂 Loading honey production data...")
|
|
data = pd.read_csv('../data/honey.csv')
|
|
print("✅ Data loaded!\n")
|
|
|
|
# For better display, we'll use a subset of the data
|
|
# Let's focus on a few states in recent years
|
|
if 'state' in data.columns and 'year' in data.columns:
|
|
# Get data for a few states in recent years
|
|
states_to_show = ['CA', 'FL', 'ND', 'SD', 'MT']
|
|
recent_data = data[(data['year'] >= 2010) & (data['state'].isin(states_to_show))]
|
|
|
|
# VISUALIZATION 1: Bar Chart
|
|
print("=" * 70)
|
|
print("VISUALIZATION 1: BAR CHART")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
if 'state' in data.columns and 'totalprod' in data.columns:
|
|
# Calculate average production by state (for top 10 states)
|
|
state_avg = data.groupby('state')['totalprod'].mean().sort_values(ascending=False).head(10)
|
|
|
|
print("Creating a bar chart of average honey production by state...")
|
|
print()
|
|
|
|
# Create the figure and axis
|
|
plt.figure(figsize=(12, 6)) # Width: 12 inches, Height: 6 inches
|
|
|
|
# Create the bar chart
|
|
plt.bar(state_avg.index, state_avg.values, color='gold', edgecolor='orange')
|
|
|
|
# Add labels and title
|
|
plt.xlabel('State', fontsize=12)
|
|
plt.ylabel('Average Production (pounds)', fontsize=12)
|
|
plt.title('Top 10 States by Average Honey Production', fontsize=14, fontweight='bold')
|
|
|
|
# Rotate x-axis labels for better readability
|
|
plt.xticks(rotation=45)
|
|
|
|
# Add a grid for easier reading (behind the bars)
|
|
plt.grid(axis='y', alpha=0.3, linestyle='--')
|
|
|
|
# Adjust layout to prevent label cutoff
|
|
plt.tight_layout()
|
|
|
|
# Save the figure
|
|
plt.savefig('bar_chart_example.png', dpi=300, bbox_inches='tight')
|
|
print("✅ Bar chart saved as 'bar_chart_example.png'")
|
|
plt.close() # Close to free memory
|
|
print()
|
|
|
|
# VISUALIZATION 2: Line Plot
|
|
print("=" * 70)
|
|
print("VISUALIZATION 2: LINE PLOT")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
if 'year' in data.columns and 'totalprod' in data.columns:
|
|
# Calculate total production by year
|
|
yearly_production = data.groupby('year')['totalprod'].sum()
|
|
|
|
print("Creating a line plot of honey production over time...")
|
|
print()
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
# Create the line plot
|
|
plt.plot(yearly_production.index, yearly_production.values,
|
|
marker='o', # Add circular markers at each data point
|
|
linewidth=2, # Line thickness
|
|
color='darkorange', # Line color
|
|
markersize=6, # Size of markers
|
|
markerfacecolor='gold') # Fill color of markers
|
|
|
|
# Add labels and title
|
|
plt.xlabel('Year', fontsize=12)
|
|
plt.ylabel('Total Production (pounds)', fontsize=12)
|
|
plt.title('Honey Production Over Time', fontsize=14, fontweight='bold')
|
|
|
|
# Add a grid
|
|
plt.grid(True, alpha=0.3, linestyle='--')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('line_plot_example.png', dpi=300, bbox_inches='tight')
|
|
print("✅ Line plot saved as 'line_plot_example.png'")
|
|
plt.close()
|
|
print()
|
|
|
|
# VISUALIZATION 3: Pie Chart
|
|
print("=" * 70)
|
|
print("VISUALIZATION 3: PIE CHART")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
if 'state' in data.columns and 'totalprod' in data.columns:
|
|
# Get total production for top 5 states
|
|
top5_states = data.groupby('state')['totalprod'].sum().sort_values(ascending=False).head(5)
|
|
|
|
print("Creating a pie chart of production share (top 5 states)...")
|
|
print()
|
|
|
|
plt.figure(figsize=(10, 8))
|
|
|
|
# Create the pie chart
|
|
colors = ['gold', 'orange', 'lightsalmon', 'lightcoral', 'peachpuff']
|
|
plt.pie(top5_states.values,
|
|
labels=top5_states.index, # State names
|
|
autopct='%1.1f%%', # Show percentages
|
|
startangle=90, # Start from top
|
|
colors=colors,
|
|
explode=(0.1, 0, 0, 0, 0)) # Slightly separate the first slice
|
|
|
|
plt.title('Top 5 States Share of Total Honey Production',
|
|
fontsize=14, fontweight='bold', pad=20)
|
|
|
|
plt.savefig('pie_chart_example.png', dpi=300, bbox_inches='tight')
|
|
print("✅ Pie chart saved as 'pie_chart_example.png'")
|
|
plt.close()
|
|
print()
|
|
|
|
# VISUALIZATION 4: Multiple Lines on One Plot
|
|
print("=" * 70)
|
|
print("VISUALIZATION 4: COMPARING MULTIPLE SERIES")
|
|
print("=" * 70)
|
|
print()
|
|
|
|
if 'year' in data.columns and 'totalprod' in data.columns and 'state' in data.columns:
|
|
# Compare production trends for a few states
|
|
states_to_compare = ['CA', 'ND', 'SD']
|
|
|
|
print(f"Creating a comparison plot for states: {', '.join(states_to_compare)}...")
|
|
print()
|
|
|
|
plt.figure(figsize=(12, 6))
|
|
|
|
# Plot a line for each state
|
|
colors_map = {'CA': 'blue', 'ND': 'green', 'SD': 'red'}
|
|
|
|
for state in states_to_compare:
|
|
if state in data['state'].values:
|
|
state_data = data[data['state'] == state].groupby('year')['totalprod'].sum()
|
|
plt.plot(state_data.index, state_data.values,
|
|
marker='o',
|
|
label=state, # This will appear in the legend
|
|
linewidth=2,
|
|
color=colors_map.get(state, 'gray'))
|
|
|
|
plt.xlabel('Year', fontsize=12)
|
|
plt.ylabel('Total Production (pounds)', fontsize=12)
|
|
plt.title('Honey Production Comparison by State', fontsize=14, fontweight='bold')
|
|
plt.legend(title='State') # Add a legend
|
|
plt.grid(True, alpha=0.3, linestyle='--')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('comparison_plot_example.png', dpi=300, bbox_inches='tight')
|
|
print("✅ Comparison plot saved as 'comparison_plot_example.png'")
|
|
plt.close()
|
|
print()
|
|
|
|
# Summary
|
|
print("=" * 70)
|
|
print("CONGRATULATIONS!")
|
|
print("=" * 70)
|
|
print("You've learned how to:")
|
|
print(" ✓ Create bar charts to compare categories")
|
|
print(" ✓ Create line plots to show trends over time")
|
|
print(" ✓ Create pie charts to show proportions")
|
|
print(" ✓ Plot multiple data series on one chart")
|
|
print(" ✓ Customize colors, labels, and titles")
|
|
print(" ✓ Save your visualizations as image files")
|
|
print()
|
|
print("Your visualizations have been saved in the examples/ folder!")
|
|
print()
|
|
print("Try this yourself:")
|
|
print(" • Change the colors of your charts")
|
|
print(" • Add more states to the comparison plot")
|
|
print(" • Create a horizontal bar chart")
|
|
print(" • Experiment with different chart styles")
|
|
print()
|
|
print("Pro tip: Always choose the right chart type for your data:")
|
|
print(" • Bar charts: Compare categories")
|
|
print(" • Line plots: Show trends over time")
|
|
print(" • Pie charts: Show parts of a whole")
|
|
print(" • Scatter plots: Show relationships between variables")
|
|
print("=" * 70)
|