In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
In [2]:
# NFLのデータをサンプルとして使います。
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)
Out[2]:
True
In [5]:
# クリップボードから読み込むことが可能です。
nfl_frame = pd.read_clipboard()
In [7]:
nfl_frame
Out[7]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West
In [25]:
# 列(カラム)の名前が.columnsでわかります。
nfl_frame.columns
Out[25]:
Index(['Rank', 'Team', 'Won', 'Lost', 'Tied*', 'Pct.', 'First Season',
       'Total Games', 'Conference'],
      dtype='object')
In [24]:
nfl_frame['First Season']
Out[24]:
0    1960
1    1920
2    1921
3    1966
4    1996
5    1950
Name: First Season, dtype: int64
In [18]:
# オブジェクトの属性のような書き方も可能
nfl_frame.Team
Out[18]:
0       Dallas Cowboys
1        Chicago Bears
2    Green Bay Packers
3       Miami Dolphins
4     Baltimore Ravens
Name: Team, dtype: object
In [9]:
# 特定のカラムで新しいDataFrameを作る
DataFrame(nfl_frame,columns=['Team','First Season','Total Games'])
Out[9]:
Team First Season Total Games
0 Dallas Cowboys 1960 894
1 Chicago Bears 1920 1357
2 Green Bay Packers 1921 1339
3 Miami Dolphins 1966 792
4 Baltimore Ravens 1996 326
5 San Francisco 49ers 1950 1003
In [10]:
DataFrame(nfl_frame,columns=['Team','First Season','Total Games','Stadium'])
Out[10]:
Team First Season Total Games Stadium
0 Dallas Cowboys 1960 894 NaN
1 Chicago Bears 1920 1357 NaN
2 Green Bay Packers 1921 1339 NaN
3 Miami Dolphins 1966 792 NaN
4 Baltimore Ravens 1996 326 NaN
5 San Francisco 49ers 1950 1003 NaN
In [11]:
# 先頭だけを表示
nfl_frame.head()
Out[11]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North
In [12]:
#最後だけを表示
nfl_frame.tail()
Out[12]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West
In [13]:
# indexを使って、行を取り出せる
nfl_frame.ix[3]
Out[13]:
Rank                         4
Team            Miami Dolphins
Won                        443
Lost                       345
Tied*                        4
Pct.                     0.562
First Season              1966
Total Games                792
Conference            AFC East
Name: 3, dtype: object
In [14]:
# 列全体に値を代入
nfl_frame['Stadium']="Levi's Stadium" #文字列内に「'」があるので、全体は「”」で囲む
In [15]:
nfl_frame
Out[15]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference Stadium
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East Levi's Stadium
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North Levi's Stadium
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North Levi's Stadium
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East Levi's Stadium
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North Levi's Stadium
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West Levi's Stadium
In [17]:
# 長さが合っていれば、列への代入が可能。
nfl_frame["Stadium"] = np.arange(6)
nfl_frame
Out[17]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference Stadium
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East 0
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North 1
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North 2
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East 3
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North 4
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West 5
In [18]:
# SeriesをDataFrameに追加する
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])
In [20]:
stadiums
Out[20]:
4    Levi's Stadium
0      AT&T Stadium
dtype: object
In [19]:
nfl_frame['Stadium']=stadiums
nfl_frame
Out[19]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference Stadium
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East AT&T Stadium
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North NaN
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North NaN
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East NaN
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North Levi's Stadium
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West NaN
In [21]:
# 列を消すことも可能
del nfl_frame['Stadium']
nfl_frame
Out[21]:
Rank Team Won Lost Tied* Pct. First Season Total Games Conference
0 1 Dallas Cowboys 510 378 6 0.574 1960 894 NFC East
1 2 Chicago Bears 752 563 42 0.570 1920 1357 NFC North
2 3 Green Bay Packers 741 561 37 0.567 1921 1339 NFC North
3 4 Miami Dolphins 443 345 4 0.562 1966 792 AFC East
4 5 Baltimore Ravens 182 143 1 0.560 1996 326 AFC North
5 6 San Francisco 49ers 545 444 14 0.550 1950 1003 NFC West
In [22]:
# 辞書からDataFramesを作ることもできます。
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame
Out[22]:
City Population
0 SF 837000
1 LA 3880000
2 NYC 8400000
In [26]:
# pandas.DataFrameの機能の全体は、Webで確認できます。
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)
Out[26]:
True