import numpy as np
from pandas import DataFrame
# 乱数列の種を与えます。
np.random.seed(12345)
dframe = DataFrame(np.random.randn(1000,4))
dframe.head()
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | -0.204708 | 0.478943 | -0.519439 | -0.555730 |
| 1 | 1.965781 | 1.393406 | 0.092908 | 0.281746 |
| 2 | 0.769023 | 1.246435 | 1.007189 | -1.296221 |
| 3 | 0.274992 | 0.228913 | 1.352917 | 0.886429 |
| 4 | -2.001637 | -0.371843 | 1.669025 | -0.438570 |
dframe.tail()
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 995 | 1.089085 | 0.251232 | -1.451985 | 1.653126 |
| 996 | -0.478509 | -0.010663 | -1.060881 | -1.502870 |
| 997 | -1.946267 | 1.013592 | 0.037333 | 0.133304 |
| 998 | -1.293122 | -0.322542 | -0.782960 | -0.303340 |
| 999 | 0.089987 | 0.292291 | 1.177706 | 0.882755 |
# Lets describe the data
dframe.describe()
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| count | 1000.000000 | 1000.000000 | 1000.000000 | 1000.000000 |
| mean | -0.067684 | 0.067924 | 0.025598 | -0.002298 |
| std | 0.998035 | 0.992106 | 1.006835 | 0.996794 |
| min | -3.428254 | -3.548824 | -3.184377 | -3.745356 |
| 25% | -0.774890 | -0.591841 | -0.641675 | -0.644144 |
| 50% | -0.116401 | 0.101143 | 0.002073 | -0.013611 |
| 75% | 0.616366 | 0.780282 | 0.680391 | 0.654328 |
| max | 3.366626 | 2.653656 | 3.260383 | 3.927528 |
# 最初の列
col = dframe[0]
col.head()
0 -0.204708 1 1.965781 2 0.769023 3 0.274992 4 -2.001637 Name: 0, dtype: float64
# 3より大きい要素を取り出します。
col[np.abs(col)>3]
523 -3.428254 900 3.366626 Name: 0, dtype: float64
# DataFrameの全列に対して同じ演算をします。
dframe[(np.abs(dframe)>3).any(1)]
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 5 | -0.539741 | 0.476985 | 3.248944 | -1.021228 |
| 97 | -0.774363 | 0.552936 | 0.106061 | 3.927528 |
| 102 | -0.655054 | -0.565230 | 3.176873 | 0.959533 |
| 305 | -2.315555 | 0.457246 | -0.025907 | -3.399312 |
| 324 | 0.050188 | 1.951312 | 3.260383 | 0.963301 |
| 400 | 0.146326 | 0.508391 | -0.196713 | -3.745356 |
| 499 | -0.293333 | -0.242459 | -3.056990 | 1.918403 |
| 523 | -3.428254 | -0.296336 | -0.439938 | -0.867165 |
| 586 | 0.275144 | 1.179227 | -3.184377 | 1.369891 |
| 808 | -0.362528 | -3.548824 | 1.553205 | -2.186301 |
| 900 | 3.366626 | -2.372214 | 0.851010 | 1.332846 |
np.sign(dframe)
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| 0 | -1 | 1 | -1 | -1 |
| 1 | 1 | 1 | 1 | 1 |
| 2 | 1 | 1 | 1 | -1 |
| 3 | 1 | 1 | 1 | 1 |
| 4 | -1 | -1 | 1 | -1 |
| 5 | -1 | 1 | 1 | -1 |
| 6 | -1 | 1 | 1 | 1 |
| 7 | 1 | 1 | -1 | -1 |
| 8 | -1 | -1 | -1 | 1 |
| 9 | -1 | 1 | -1 | 1 |
| 10 | -1 | -1 | -1 | -1 |
| 11 | -1 | 1 | 1 | -1 |
| 12 | 1 | 1 | 1 | 1 |
| 13 | -1 | 1 | 1 | -1 |
| 14 | -1 | 1 | 1 | -1 |
| 15 | 1 | -1 | 1 | 1 |
| 16 | 1 | -1 | -1 | -1 |
| 17 | -1 | -1 | -1 | 1 |
| 18 | 1 | 1 | 1 | -1 |
| 19 | -1 | -1 | -1 | 1 |
| 20 | 1 | -1 | 1 | 1 |
| 21 | 1 | 1 | -1 | 1 |
| 22 | -1 | 1 | 1 | 1 |
| 23 | -1 | 1 | 1 | 1 |
| 24 | 1 | -1 | 1 | 1 |
| 25 | -1 | -1 | -1 | -1 |
| 26 | -1 | -1 | 1 | 1 |
| 27 | -1 | 1 | -1 | 1 |
| 28 | 1 | -1 | -1 | -1 |
| 29 | 1 | -1 | 1 | -1 |
| ... | ... | ... | ... | ... |
| 970 | 1 | 1 | 1 | -1 |
| 971 | -1 | -1 | -1 | 1 |
| 972 | 1 | 1 | -1 | -1 |
| 973 | -1 | 1 | 1 | -1 |
| 974 | -1 | -1 | 1 | -1 |
| 975 | -1 | -1 | 1 | -1 |
| 976 | 1 | 1 | 1 | 1 |
| 977 | -1 | 1 | 1 | 1 |
| 978 | 1 | -1 | 1 | 1 |
| 979 | 1 | 1 | -1 | 1 |
| 980 | -1 | -1 | -1 | -1 |
| 981 | -1 | 1 | 1 | 1 |
| 982 | -1 | 1 | 1 | 1 |
| 983 | -1 | 1 | 1 | 1 |
| 984 | 1 | -1 | 1 | -1 |
| 985 | -1 | 1 | 1 | 1 |
| 986 | -1 | -1 | 1 | -1 |
| 987 | -1 | -1 | 1 | -1 |
| 988 | -1 | 1 | -1 | -1 |
| 989 | 1 | 1 | 1 | 1 |
| 990 | -1 | -1 | -1 | 1 |
| 991 | 1 | -1 | 1 | 1 |
| 992 | -1 | 1 | -1 | -1 |
| 993 | 1 | 1 | -1 | 1 |
| 994 | 1 | -1 | 1 | 1 |
| 995 | 1 | 1 | -1 | 1 |
| 996 | -1 | -1 | -1 | -1 |
| 997 | -1 | 1 | 1 | 1 |
| 998 | -1 | -1 | -1 | -1 |
| 999 | 1 | 1 | 1 | 1 |
1000 rows × 4 columns
# 絶対値の上限が3になるようにします。
dframe[np.abs(dframe)>3] = np.sign(dframe) *3
dframe.describe()
| 0 | 1 | 2 | 3 | |
|---|---|---|---|---|
| count | 1000.000000 | 1000.000000 | 1000.000000 | 1000.000000 |
| mean | -0.067623 | 0.068473 | 0.025153 | -0.002081 |
| std | 0.995485 | 0.990253 | 1.003977 | 0.989736 |
| min | -3.000000 | -3.000000 | -3.000000 | -3.000000 |
| 25% | -0.774890 | -0.591841 | -0.641675 | -0.644144 |
| 50% | -0.116401 | 0.101143 | 0.002073 | -0.013611 |
| 75% | 0.616366 | 0.780282 | 0.680391 | 0.654328 |
| max | 3.000000 | 2.653656 | 3.000000 | 3.000000 |