Creating plots with pandas¶

The plot method¶

In [1]:

import pandas as pd
import numpy as np

In [2]:

ts = pd.Series(np.random.randn(1000),
               index=pd.date_range('1/1/2000', periods=1000))

In [3]:

ts = ts.cumsum()

In [4]:

ts.head()

Out[4]:

2000-01-01   -0.592325
2000-01-02   -1.417050
2000-01-03   -2.662854
2000-01-04   -3.204283
2000-01-05   -5.686098
Freq: D, dtype: float64

Line chart¶

In [5]:

ts.plot()

Out[5]:

<Axes: >

No description has been provided for this image

In [6]:

df = pd.DataFrame(np.random.randn(1000, 4),
                  index=ts.index, columns=list('ABCD'))

In [7]:

df.head()

Out[7]:

	A	B	C	D
2000-01-01	-0.416102	-0.469991	0.834004	-0.999729
2000-01-02	1.182148	1.074082	-0.809501	0.813045
2000-01-03	2.320584	0.524429	1.728922	-0.936292
2000-01-04	-0.021193	-0.985823	-2.142262	-0.125014
2000-01-05	-1.264067	-1.737040	-1.028364	-0.809272

In [8]:

df = df.cumsum()
df

Out[8]:

	A	B	C	D
2000-01-01	-0.416102	-0.469991	0.834004	-0.999729
2000-01-02	0.766046	0.604091	0.024503	-0.186684
2000-01-03	3.086629	1.128520	1.753426	-1.122976
2000-01-04	3.065436	0.142698	-0.388836	-1.247990
2000-01-05	1.801369	-1.594342	-1.417201	-2.057262
...	...	...	...	...
2002-09-22	-29.887551	-5.512215	-27.085051	-66.790310
2002-09-23	-29.834693	-6.891953	-25.034900	-67.620236
2002-09-24	-28.368505	-6.620989	-25.083735	-68.047712
2002-09-25	-28.412433	-7.138621	-26.146851	-66.215998
2002-09-26	-29.115388	-7.531759	-26.402414	-66.779953

1000 rows × 4 columns

In [9]:

df.plot()

Out[9]:

<Axes: >

Business sales data¶

In [10]:

sales_df = pd.read_csv('business-sales.csv')

In [11]:

sales_df.head()

Out[11]:

	division	level of education	training level	work experience	salary	sales
0	computer hardware	associate's degree	0	4	75650	209271
1	office supplies	associate's degree	2	7	96502	394503
2	printers	some college	0	2	55359	113345
3	printers	associate's degree	1	6	83955	301349
4	computer software	some college	1	6	103202	372674

In [12]:

sales_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   division            1000 non-null   object
 1   level of education  1000 non-null   object
 2   training level      1000 non-null   int64 
 3   work experience     1000 non-null   int64 
 4   salary              1000 non-null   int64 
 5   sales               1000 non-null   int64 
dtypes: int64(4), object(2)
memory usage: 47.0+ KB

In [13]:

sales_df.describe()

Out[13]:

	training level	work experience	salary	sales
count	1000.000000	1000.000000	1000.000000	1000.000000
mean	1.212000	6.071000	89344.121000	333310.988000
std	0.938049	3.027396	17693.218567	110626.544485
min	0.000000	0.000000	41672.000000	37354.000000
25%	0.000000	4.000000	77819.500000	260057.750000
50%	1.000000	6.000000	89933.500000	335900.500000
75%	2.000000	8.000000	101074.500000	405382.500000
max	3.000000	16.000000	152529.000000	785291.000000

Plot types¶

Bar chart¶

In [14]:

division_values = sales_df['division'].value_counts()
division_values

Out[14]:

division
printers             270
office supplies      246
peripherals          228
computer hardware    174
computer software     82
Name: count, dtype: int64

In [15]:

division_values.plot(kind="bar")

Out[15]:

<Axes: xlabel='division'>

In [16]:

division_values.plot(kind="bar", rot=30)

Out[16]:

<Axes: xlabel='division'>

In [17]:

division_values.plot(kind="bar",
                     rot=30,
                     color="orange")

Out[17]:

<Axes: xlabel='division'>

In [18]:

division_values.plot(kind="bar",
                     rot=30,
                     color=["red", "blue", "green", "yellow", "orange"])

Out[18]:

<Axes: xlabel='division'>

In [19]:

division_values.plot(kind="barh",
                     rot=30,
                     color=["red", "blue", "green", "yellow", "orange"])

Out[19]:

<Axes: ylabel='division'>

In [21]:

sales_per_division = sales_df[["division", "sales"]].groupby("division").mean().reset_index()
sales_per_division

Out[21]:

	division	sales
0	computer hardware	327411.333333
1	computer software	339389.036585
2	office supplies	336236.727642
3	peripherals	326656.192982
4	printers	338220.992593

In [23]:

sales_per_division["next_year_sales"] = sales_per_division["sales"] * 1.3

In [24]:

sales_per_division

Out[24]:

	division	sales	next_year_sales
0	computer hardware	327411.333333	425634.733333
1	computer software	339389.036585	441205.747561
2	office supplies	336236.727642	437107.745935
3	peripherals	326656.192982	424653.050877
4	printers	338220.992593	439687.290370

In [25]:

sales_per_division.plot(kind="bar",
                        x="division",
                        y="sales",
                        rot=30)

Out[25]:

<Axes: xlabel='division'>

In [26]:

sales_per_division.plot(kind="bar",
                        x="division",
                        y=["sales", "next_year_sales"],
                        rot=30)

Out[26]:

<Axes: xlabel='division'>

In [27]:

sales_per_division.plot(kind="bar",
                        x="division",
                        y=["sales", "next_year_sales"],
                        stacked=True,
                        rot=30)

Out[27]:

<Axes: xlabel='division'>

In [28]:

sales_per_division.plot(kind="bar",
                        x="division",
                        y=["sales", "next_year_sales"],
                        xlabel="Division",
                        ylabel="Total sales",
                        stacked=True,
                        rot=30)

Out[28]:

<Axes: xlabel='Division', ylabel='Total sales'>

Scatter plot¶

In [29]:

sales_df.plot(kind="scatter", x="salary", y="sales")

Out[29]:

<Axes: xlabel='salary', ylabel='sales'>

Histogram¶

In [30]:

sales_df['work experience'].plot(kind="hist")

Out[30]:

<Axes: ylabel='Frequency'>

In [31]:

sales_df['work experience'].plot(kind="hist", bins=20)

Out[31]:

<Axes: ylabel='Frequency'>

KDE¶

In [32]:

sales_df['work experience'].plot(kind='kde')

Out[32]:

<Axes: ylabel='Density'>

Pie chart¶

In [33]:

education_values = sales_df['level of education'].value_counts()
education_values

Out[33]:

level of education
some college          401
associate's degree    393
high school           102
bachelor's degree      98
master's degree         6
Name: count, dtype: int64

In [34]:

education_values.plot(kind="pie")

Out[34]:

<Axes: ylabel='count'>

In [35]:

education_values.plot(kind="pie", figsize=(8,8))

Out[35]:

<Axes: ylabel='count'>

In [36]:

education_values.plot(kind="pie",
                      figsize=(8,8),
                      autopct='%1.2f%%')

Out[36]:

<Axes: ylabel='count'>

Area chart¶

In [37]:

experience_values = sales_df['work experience'].value_counts()
experience_values.head()

Out[37]:

work experience
7    133
5    130
6    118
8    115
4     84
Name: count, dtype: int64

In [38]:

experience_values = experience_values.sort_index()
experience_values.head()

Out[38]:

work experience
0    34
1    46
2    63
3    66
4    84
Name: count, dtype: int64

In [39]:

experience_values.plot(kind="area")

Out[39]:

<Axes: xlabel='work experience'>

In [40]:

experience_values_2 = experience_values / 2
experience_values_3 = experience_values / 7

In [41]:

new_df = pd.DataFrame({
    "exp_1": experience_values,
    "exp_2": experience_values_2,
    "exp_3": experience_values_3,
})

new_df.head()

Out[41]:

	exp_1	exp_2	exp_3
work experience
0	34	17.0	4.857143
1	46	23.0	6.571429
2	63	31.5	9.000000
3	66	33.0	9.428571
4	84	42.0	12.000000

In [42]:

new_df.plot(kind="area")

Out[42]:

<Axes: xlabel='work experience'>

Boxplot¶

In [43]:

sales_df.plot(kind="box")

Out[43]:

<Axes: >

In [44]:

sales_df["salary"].plot(kind="box")

Out[44]:

<Axes: >

In [45]:

sales_df["work experience"].plot(kind="box")

Out[45]:

<Axes: >

Extra customizations¶

In [46]:

sales_df['work experience'].plot(kind="hist")

Out[46]:

<Axes: ylabel='Frequency'>

In [47]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon")

Out[47]:

<Axes: ylabel='Frequency'>

In [48]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon",
                                 title="Histogram of Work experience")

Out[48]:

<Axes: title={'center': 'Histogram of Work experience'}, ylabel='Frequency'>

In [49]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon",
                                 figsize=(10,6),
                                 title="Histogram of Work experience")

Out[49]:

<Axes: title={'center': 'Histogram of Work experience'}, ylabel='Frequency'>

In [50]:

import matplotlib.pyplot as plt

In [51]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon",
                                 figsize=(10,6),
                                 label="Work experience in years",
                                 title="Histogram of Work experience")

plt.legend()

Out[51]:

<matplotlib.legend.Legend at 0x7f8c5a915d90>

In [52]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon",
                                 figsize=(10,6),
                                 grid=True,
                                 label="Work experience in years",
                                 title="Histogram of Work experience")

plt.legend()

Out[52]:

<matplotlib.legend.Legend at 0x7f8c5a8a7310>

In [53]:

sales_df['work experience'].plot(kind="hist",
                                 color="salmon",
                                 figsize=(10,6),
                                 grid=True,
                                 fontsize=14,
                                 label="Work experience in years",
                                 title="Histogram of Work experience")

plt.legend()

Out[53]:

<matplotlib.legend.Legend at 0x7f8c5a9161d0>

Exporting my chart¶

In [54]:

my_chart = sales_df['work experience'].plot(kind="hist",
                                            color="salmon",
                                            figsize=(10,6),
                                            grid=True,
                                            fontsize=14,
                                            label="Work experience in years",
                                            title="Histogram of Work experience")

plt.legend()

Out[54]:

<matplotlib.legend.Legend at 0x7f8c5a50d6d0>

In [ ]:

my_chart.figure.savefig("my-chart.png")