In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
In [3]:
years = [1990,1991,1992,2008,2012,2015,1987,1969,2013,2008,1999]
In [4]:
# これを10年ごとにまとめてみます。
decade_bins = [1960,1970,1980,1990,2000,2010,2020]
In [5]:
# カテゴリー分けができます。
decade_cat = pd.cut(years,decade_bins)
In [6]:
decade_cat
Out[6]:
[(1980, 1990], (1990, 2000], (1990, 2000], (2000, 2010], (2010, 2020], ..., (1980, 1990], (1960, 1970], (2010, 2020], (2000, 2010], (1990, 2000]]
Length: 11
Categories (6, object): [(1960, 1970] < (1970, 1980] < (1980, 1990] < (1990, 2000] < (2000, 2010] < (2010, 2020]]
In [7]:
# カテゴリーを表示します。
decade_cat.categories
Out[7]:
Index(['(1960, 1970]', '(1970, 1980]', '(1980, 1990]', '(1990, 2000]',
       '(2000, 2010]', '(2010, 2020]'],
      dtype='object')
In [8]:
# カテゴリーをチェックできます。
pd.value_counts(decade_cat)
Out[8]:
(2010, 2020]    3
(1990, 2000]    3
(2000, 2010]    2
(1980, 1990]    2
(1960, 1970]    1
(1970, 1980]    0
dtype: int64
In [15]:
# グループの数だけを指定することもできます。
pd.cut(years,2)
Out[15]:
[(1968.954, 1992], (1968.954, 1992], (1968.954, 1992], (1992, 2015], (1992, 2015], ..., (1968.954, 1992], (1968.954, 1992], (1992, 2015], (1992, 2015], (1992, 2015]]
Length: 11
Categories (2, object): [(1968.954, 1992] < (1992, 2015]]