In [24]:
import numpy as np
from pandas import DataFrame
In [2]:
# 乱数列の種を与えます。
np.random.seed(12345)
dframe = DataFrame(np.random.randn(1000,4))
In [3]:
dframe.head()
Out[3]:
0 1 2 3
0 -0.204708 0.478943 -0.519439 -0.555730
1 1.965781 1.393406 0.092908 0.281746
2 0.769023 1.246435 1.007189 -1.296221
3 0.274992 0.228913 1.352917 0.886429
4 -2.001637 -0.371843 1.669025 -0.438570
In [4]:
dframe.tail()
Out[4]:
0 1 2 3
995 1.089085 0.251232 -1.451985 1.653126
996 -0.478509 -0.010663 -1.060881 -1.502870
997 -1.946267 1.013592 0.037333 0.133304
998 -1.293122 -0.322542 -0.782960 -0.303340
999 0.089987 0.292291 1.177706 0.882755
In [5]:
# Lets describe the data
dframe.describe()
Out[5]:
0 1 2 3
count 1000.000000 1000.000000 1000.000000 1000.000000
mean -0.067684 0.067924 0.025598 -0.002298
std 0.998035 0.992106 1.006835 0.996794
min -3.428254 -3.548824 -3.184377 -3.745356
25% -0.774890 -0.591841 -0.641675 -0.644144
50% -0.116401 0.101143 0.002073 -0.013611
75% 0.616366 0.780282 0.680391 0.654328
max 3.366626 2.653656 3.260383 3.927528
In [6]:
# 最初の列
col = dframe[0]
In [7]:
col.head()
Out[7]:
0   -0.204708
1    1.965781
2    0.769023
3    0.274992
4   -2.001637
Name: 0, dtype: float64
In [8]:
# 3より大きい要素を取り出します。
col[np.abs(col)>3]
Out[8]:
523   -3.428254
900    3.366626
Name: 0, dtype: float64
In [19]:
# DataFrameの全列に対して同じ演算をします。
dframe[(np.abs(dframe)>3).any(1)]
Out[19]:
0 1 2 3
5 -0.539741 0.476985 3.248944 -1.021228
97 -0.774363 0.552936 0.106061 3.927528
102 -0.655054 -0.565230 3.176873 0.959533
305 -2.315555 0.457246 -0.025907 -3.399312
324 0.050188 1.951312 3.260383 0.963301
400 0.146326 0.508391 -0.196713 -3.745356
499 -0.293333 -0.242459 -3.056990 1.918403
523 -3.428254 -0.296336 -0.439938 -0.867165
586 0.275144 1.179227 -3.184377 1.369891
808 -0.362528 -3.548824 1.553205 -2.186301
900 3.366626 -2.372214 0.851010 1.332846
In [23]:
np.sign(dframe)
Out[23]:
0 1 2 3
0 -1 1 -1 -1
1 1 1 1 1
2 1 1 1 -1
3 1 1 1 1
4 -1 -1 1 -1
5 -1 1 1 -1
6 -1 1 1 1
7 1 1 -1 -1
8 -1 -1 -1 1
9 -1 1 -1 1
10 -1 -1 -1 -1
11 -1 1 1 -1
12 1 1 1 1
13 -1 1 1 -1
14 -1 1 1 -1
15 1 -1 1 1
16 1 -1 -1 -1
17 -1 -1 -1 1
18 1 1 1 -1
19 -1 -1 -1 1
20 1 -1 1 1
21 1 1 -1 1
22 -1 1 1 1
23 -1 1 1 1
24 1 -1 1 1
25 -1 -1 -1 -1
26 -1 -1 1 1
27 -1 1 -1 1
28 1 -1 -1 -1
29 1 -1 1 -1
... ... ... ... ...
970 1 1 1 -1
971 -1 -1 -1 1
972 1 1 -1 -1
973 -1 1 1 -1
974 -1 -1 1 -1
975 -1 -1 1 -1
976 1 1 1 1
977 -1 1 1 1
978 1 -1 1 1
979 1 1 -1 1
980 -1 -1 -1 -1
981 -1 1 1 1
982 -1 1 1 1
983 -1 1 1 1
984 1 -1 1 -1
985 -1 1 1 1
986 -1 -1 1 -1
987 -1 -1 1 -1
988 -1 1 -1 -1
989 1 1 1 1
990 -1 -1 -1 1
991 1 -1 1 1
992 -1 1 -1 -1
993 1 1 -1 1
994 1 -1 1 1
995 1 1 -1 1
996 -1 -1 -1 -1
997 -1 1 1 1
998 -1 -1 -1 -1
999 1 1 1 1

1000 rows × 4 columns

In [20]:
# 絶対値の上限が3になるようにします。
dframe[np.abs(dframe)>3] = np.sign(dframe) *3
In [21]:
dframe.describe()
Out[21]:
0 1 2 3
count 1000.000000 1000.000000 1000.000000 1000.000000
mean -0.067623 0.068473 0.025153 -0.002081
std 0.995485 0.990253 1.003977 0.989736
min -3.000000 -3.000000 -3.000000 -3.000000
25% -0.774890 -0.591841 -0.641675 -0.644144
50% -0.116401 0.101143 0.002073 -0.013611
75% 0.616366 0.780282 0.680391 0.654328
max 3.000000 2.653656 3.000000 3.000000