mpg 데이터
import pandas as pd
mpg=pd.read_csv('/content/drive/MyDrive/mpg.csv')
mpg.head()
| manufacturer | model | displ | year | cyl | trans | drv | cty | hwy | fl | category | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | audi | a4 | 1.8 | 1999 | 4 | auto(l5) | f | 18 | 29 | p | compact |
| 1 | audi | a4 | 1.8 | 1999 | 4 | manual(m5) | f | 21 | 29 | p | compact |
| 2 | audi | a4 | 2.0 | 2008 | 4 | manual(m6) | f | 20 | 31 | p | compact |
| 3 | audi | a4 | 2.0 | 2008 | 4 | auto(av) | f | 21 | 30 | p | compact |
| 4 | audi | a4 | 2.8 | 1999 | 6 | auto(l5) | f | 16 | 26 | p | compact |
컬럼 설명
gapminder 데이터
gap = pd.read_csv('/content/drive/MyDrive/gapminder.tsv',sep='\t')
gap.head()
| country | continent | year | lifeExp | pop | gdpPercap | |
|---|---|---|---|---|---|---|
| 0 | Afghanistan | Asia | 1952 | 28.801 | 8425333 | 779.445314 |
| 1 | Afghanistan | Asia | 1957 | 30.332 | 9240934 | 820.853030 |
| 2 | Afghanistan | Asia | 1962 | 31.997 | 10267083 | 853.100710 |
| 3 | Afghanistan | Asia | 1967 | 34.020 | 11537966 | 836.197138 |
| 4 | Afghanistan | Asia | 1972 | 36.088 | 13079460 | 739.981106 |
컬럼 설명
mgp 데이터 나라별 자동차
gap['country'].unique()
array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.',
'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Czech Republic',
'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
'Finland', 'France', 'Gabon', 'Gambia', 'Germany', 'Ghana',
'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti',
'Honduras', 'Hong Kong, China', 'Hungary', 'Iceland', 'India',
'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea, Dem. Rep.',
'Korea, Rep.', 'Kuwait', 'Lebanon', 'Lesotho', 'Liberia', 'Libya',
'Madagascar', 'Malawi', 'Malaysia', 'Mali', 'Mauritania',
'Mauritius', 'Mexico', 'Mongolia', 'Montenegro', 'Morocco',
'Mozambique', 'Myanmar', 'Namibia', 'Nepal', 'Netherlands',
'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'Norway', 'Oman',
'Pakistan', 'Panama', 'Paraguay', 'Peru', 'Philippines', 'Poland',
'Portugal', 'Puerto Rico', 'Reunion', 'Romania', 'Rwanda',
'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia',
'Sierra Leone', 'Singapore', 'Slovak Republic', 'Slovenia',
'Somalia', 'South Africa', 'Spain', 'Sri Lanka', 'Sudan',
'Swaziland', 'Sweden', 'Switzerland', 'Syria', 'Taiwan',
'Tanzania', 'Thailand', 'Togo', 'Trinidad and Tobago', 'Tunisia',
'Turkey', 'Uganda', 'United Kingdom', 'United States', 'Uruguay',
'Venezuela', 'Vietnam', 'West Bank and Gaza', 'Yemen, Rep.',
'Zambia', 'Zimbabwe'], dtype=object)
country_mpg=['Germany','United States','Japan','Korea, Rep.','United Kingdom']
data=gap.loc[:,['country','gdpPercap']]
data.groupby('country')['gdpPercap'].mean()[country_mpg].sort_values(ascending=False)
country
United States 26261.151347
Germany 20556.684433
United Kingdom 19380.472986
Japan 17750.869984
Korea, Rep. 8217.318485
Name: gdpPercap, dtype: float64
data1=gap.iloc[:,[0,-3]]
data1.groupby('country').mean().round(0).sort_values(by='lifeExp',ascending=False)
| lifeExp | |
|---|---|
| country | |
| Iceland | 77.0 |
| Netherlands | 76.0 |
| Switzerland | 76.0 |
| Sweden | 76.0 |
| Norway | 76.0 |
| ... | ... |
| Mozambique | 40.0 |
| Guinea-Bissau | 39.0 |
| Angola | 38.0 |
| Sierra Leone | 37.0 |
| Afghanistan | 37.0 |
142 rows × 1 columns
data2=gap.iloc[:,[0,-3]]
country_mpg=['Germany','United States','Japan','Korea, Rep.','United Kingdom']
data3=data2.groupby('country').mean().sort_values(by='lifeExp',ascending=False)
data3.loc[country_mpg,:].sort_values(by='lifeExp',ascending=False).round(0)
| lifeExp | |
|---|---|
| country | |
| Japan | 75.0 |
| United Kingdom | 74.0 |
| United States | 73.0 |
| Germany | 73.0 |
| Korea, Rep. | 65.0 |
data4=gap.loc[:,['year','lifeExp']]
data4.groupby('year')['lifeExp'].mean()
year
1952 49.057620
1957 51.507401
1962 53.609249
1967 55.678290
1972 57.647386
1977 59.570157
1982 61.533197
1987 63.212613
1992 64.160338
1997 65.014676
2002 65.694923
2007 67.007423
Name: lifeExp, dtype: float64
data5=gap.loc[:,['continent','pop']]
data5.groupby('continent')['pop'].sum().sort_values(ascending=False)
continent
Asia 30507333901
Americas 7351438499
Africa 6187585961
Europe 6181115304
Oceania 212992136
Name: pop, dtype: int64
mpg.loc[:,['hwy','cty']]
| hwy | cty | |
|---|---|---|
| 0 | 29 | 18 |
| 1 | 29 | 21 |
| 2 | 31 | 20 |
| 3 | 30 | 21 |
| 4 | 26 | 16 |
| ... | ... | ... |
| 229 | 28 | 19 |
| 230 | 29 | 21 |
| 231 | 26 | 16 |
| 232 | 26 | 18 |
| 233 | 26 | 17 |
234 rows × 2 columns
gap.groupby('continent')['gdpPercap'].max().sort_values(ascending=False).iloc[0]
113523.1329
IMF : 1997~2001
data11=gap.loc[:,['country','year','gdpPercap']]
data12=data11[(data11['country']=='Korea, Rep.')& (1997<=data11['year']) & (data11['year']<=2002)]
data12.groupby('year')['gdpPercap'].mean()
year
1997 15993.52796
2002 19233.98818
Name: gdpPercap, dtype: float64
data11=gap.loc[:,['country','year','gdpPercap']]
data12=data11[data11['country']=='Korea, Rep.']
data13=data12.groupby('year')['gdpPercap'].mean()
import matplotlib.pyplot as plt
plt.plot(data13)
plt.show()

data11=gap.loc[:,['country','year','gdpPercap']]
data12=data11[data11['country']=='United States']
data13=data12.groupby('year')['gdpPercap'].mean()
import matplotlib.pyplot as plt
plt.plot(data13)
plt.show()

댓글