10_minutes_to_pandas -- Sage

import pandas as pd import numpy as np import matplotlib.pyplot as plt

s = pd.Series([1,3,5,np.nan,6,8]); s

0      1
1      3
2      5
3    NaN
4      6
5      8
dtype: object

0      1
1      3
2      5
3    NaN
4      6
5      8
dtype: object

dates = pd.date_range('2013-01-01', '2013-01-06')

dates

<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None

<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None

df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD')); df

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818

[6 rows x 4 columns]

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818

[6 rows x 4 columns]

df2 = pd.DataFrame({ 'A' : 1., 'B' : pd.Timestamp('20130102'), 'C' : pd.Series(1, index=list(range(4)), dtype='float32'), 'D' : np.array([3] * 4, dtype='int32'), 'E' : 'foo' })

df2

                  A                   B  C  D    E
0  1.00000000000000 2013-01-02 00:00:00  1  3  foo
1  1.00000000000000 2013-01-02 00:00:00  1  3  foo
2  1.00000000000000 2013-01-02 00:00:00  1  3  foo
3  1.00000000000000 2013-01-02 00:00:00  1  3  foo

[4 rows x 5 columns]

                  A                   B  C  D    E
0  1.00000000000000 2013-01-02 00:00:00  1  3  foo
1  1.00000000000000 2013-01-02 00:00:00  1  3  foo
2  1.00000000000000 2013-01-02 00:00:00  1  3  foo
3  1.00000000000000 2013-01-02 00:00:00  1  3  foo

[4 rows x 5 columns]

df2.dtypes

A            object
B    datetime64[ns]
C           float32
D             int32
E            object
dtype: object

A            object
B    datetime64[ns]
C           float32
D             int32
E            object
dtype: object

df.head()

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892

[5 rows x 4 columns]

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892

[5 rows x 4 columns]

df.tail(3)

                   A         B         C         D
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818

[3 rows x 4 columns]

                   A         B         C         D
2013-01-04 -1.971963  0.634871  0.977957  0.762491
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818

[3 rows x 4 columns]

df.index

<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None

<class 'pandas.tseries.index.DatetimeIndex'>
[2013-01-01 00:00:00, ..., 2013-01-06 00:00:00]
Length: 6, Freq: D, Timezone: None

df.values

array([[ 0.3292223 , -1.02760341,  0.04574179, -0.00503985],
       [-2.13162621, -2.99055305, -0.91813208, -1.11921746],
       [-0.96706357, -0.27050684, -1.14252505,  0.03002641],
       [-1.97196316,  0.63487149,  0.97795723,  0.76249148],
       [-0.42324079,  0.09996291, -0.19358013,  0.58589231],
       [-1.91100396, -0.47265915,  0.74506379, -0.61481822]])

array([[ 0.3292223 , -1.02760341,  0.04574179, -0.00503985],
       [-2.13162621, -2.99055305, -0.91813208, -1.11921746],
       [-0.96706357, -0.27050684, -1.14252505,  0.03002641],
       [-1.97196316,  0.63487149,  0.97795723,  0.76249148],
       [-0.42324079,  0.09996291, -0.19358013,  0.58589231],
       [-1.91100396, -0.47265915,  0.74506379, -0.61481822]])

df.describe()

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean  -1.179279 -0.671081 -0.080912 -0.060111
std    0.996287  1.265457  0.855586  0.711977
min   -2.131626 -2.990553 -1.142525 -1.119217
25%   -1.956723 -0.888867 -0.736994 -0.462374
50%   -1.439034 -0.371583 -0.073919  0.012493
75%   -0.559196  0.007345  0.570233  0.446926
max    0.329222  0.634871  0.977957  0.762491

[8 rows x 4 columns]

              A         B         C         D
count  6.000000  6.000000  6.000000  6.000000
mean  -1.179279 -0.671081 -0.080912 -0.060111
std    0.996287  1.265457  0.855586  0.711977
min   -2.131626 -2.990553 -1.142525 -1.119217
25%   -1.956723 -0.888867 -0.736994 -0.462374
50%   -1.439034 -0.371583 -0.073919  0.012493
75%   -0.559196  0.007345  0.570233  0.446926
max    0.329222  0.634871  0.977957  0.762491

[8 rows x 4 columns]

df.T

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05 
2013-01-06
A    0.329222   -2.131626   -0.967064   -1.971963   -0.423241  
-1.911004
B   -1.027603   -2.990553   -0.270507    0.634871    0.099963  
-0.472659
C    0.045742   -0.918132   -1.142525    0.977957   -0.193580   
0.745064
D   -0.005040   -1.119217    0.030026    0.762491    0.585892  
-0.614818

[4 rows x 6 columns]

   2013-01-01  2013-01-02  2013-01-03  2013-01-04  2013-01-05  2013-01-06
A    0.329222   -2.131626   -0.967064   -1.971963   -0.423241   -1.911004
B   -1.027603   -2.990553   -0.270507    0.634871    0.099963   -0.472659
C    0.045742   -0.918132   -1.142525    0.977957   -0.193580    0.745064
D   -0.005040   -1.119217    0.030026    0.762491    0.585892   -0.614818

[4 rows x 6 columns]

# sageでは、axis=1の操作ができない # df.sort_index(axis=1, ascending=False) df.T.sort_index(ascending=False).T

                   D         C         B         A
2013-01-01 -0.005040  0.045742 -1.027603  0.329222
2013-01-02 -1.119217 -0.918132 -2.990553 -2.131626
2013-01-03  0.030026 -1.142525 -0.270507 -0.967064
2013-01-04  0.762491  0.977957  0.634871 -1.971963
2013-01-05  0.585892 -0.193580  0.099963 -0.423241
2013-01-06 -0.614818  0.745064 -0.472659 -1.911004

[6 rows x 4 columns]

                   D         C         B         A
2013-01-01 -0.005040  0.045742 -1.027603  0.329222
2013-01-02 -1.119217 -0.918132 -2.990553 -2.131626
2013-01-03  0.030026 -1.142525 -0.270507 -0.967064
2013-01-04  0.762491  0.977957  0.634871 -1.971963
2013-01-05  0.585892 -0.193580  0.099963 -0.423241
2013-01-06 -0.614818  0.745064 -0.472659 -1.911004

[6 rows x 4 columns]

df.sort(columns='B')

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-04 -1.971963  0.634871  0.977957  0.762491

[6 rows x 4 columns]

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-05 -0.423241  0.099963 -0.193580  0.585892
2013-01-04 -1.971963  0.634871  0.977957  0.762491

[6 rows x 4 columns]

html.table(df.values.tolist(), header=df.columns.tolist())





A
B
C
D


 $0.329222296783$ 
 $-1.02760341388$ 
 $0.0457417860993$ 
 $-0.00503984970181$ 


 $-2.1316262074$ 
 $-2.99055305197$ 
 $-0.918132081347$ 
 $-1.11921746202$ 


 $-0.96706357281$ 
 $-0.270506841456$ 
 $-1.14252505049$ 
 $0.030026414723$ 


 $-1.97196316492$ 
 $0.634871485557$ 
 $0.977957234846$ 
 $0.762491481949$ 


 $-0.423240785995$ 
 $0.0999629116528$ 
 $-0.193580129718$ 
 $0.585892313387$ 


 $-1.91100396093$ 
 $-0.472659152377$ 
 $0.745063793709$ 
 $-0.614818216996$





A
B
C
D

df['A']

2013-01-01    0.329222
2013-01-02   -2.131626
2013-01-03   -0.967064
2013-01-04   -1.971963
2013-01-05   -0.423241
2013-01-06   -1.911004
Freq: D, Name: A, dtype: float64

2013-01-01    0.329222
2013-01-02   -2.131626
2013-01-03   -0.967064
2013-01-04   -1.971963
2013-01-05   -0.423241
2013-01-06   -1.911004
Freq: D, Name: A, dtype: float64

#df[0:3] df.iloc[0:3,]

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026

[3 rows x 4 columns]

                   A         B         C         D
2013-01-01  0.329222 -1.027603  0.045742 -0.005040
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026

[3 rows x 4 columns]

df['2013-01-02':'2013-01-04']

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491

[3 rows x 4 columns]

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026
2013-01-04 -1.971963  0.634871  0.977957  0.762491

[3 rows x 4 columns]

#df.loc[dates[0]] df.ix[0]

A    0.370320
B    0.110648
C   -0.609493
D    0.965530
Name: 0, dtype: float64

A    0.370320
B    0.110648
C   -0.609493
D    0.965530
Name: 0, dtype: float64

df.loc[:,['A', 'B']]

                   A         B
2013-01-01  0.329222 -1.027603
2013-01-02 -2.131626 -2.990553
2013-01-03 -0.967064 -0.270507
2013-01-04 -1.971963  0.634871
2013-01-05 -0.423241  0.099963
2013-01-06 -1.911004 -0.472659

[6 rows x 2 columns]

                   A         B
2013-01-01  0.329222 -1.027603
2013-01-02 -2.131626 -2.990553
2013-01-03 -0.967064 -0.270507
2013-01-04 -1.971963  0.634871
2013-01-05 -0.423241  0.099963
2013-01-06 -1.911004 -0.472659

[6 rows x 2 columns]

df.loc['20130102':'20130104', ['A', 'B']]

                   A         B
2013-01-02 -2.131626 -2.990553
2013-01-03 -0.967064 -0.270507
2013-01-04 -1.971963  0.634871

[3 rows x 2 columns]

                   A         B
2013-01-02 -2.131626 -2.990553
2013-01-03 -0.967064 -0.270507
2013-01-04 -1.971963  0.634871

[3 rows x 2 columns]

df.loc['20130102',['A', 'B']]

A   -2.131626
B   -2.990553
Name: 2013-01-02 00:00:00, dtype: float64

A   -2.131626
B   -2.990553
Name: 2013-01-02 00:00:00, dtype: float64

#df.loc[dates[0], 'A'] df.loc['20130101', 'A']

0.32922229678262199

0.32922229678262199

#df.at[dates[0], 'A']

Traceback (click to the left of this block for traceback)
...
AttributeError: 'numpy.datetime64' object has no attribute 'name'

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_361.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("I2RmLmF0W2RhdGVzWzBdLCAnQSddCmRmLml4W2RhdGVzWzBdLCAnQSdd"),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpqpchms/___code___.py", line 3, in <module>
    exec compile(u"df.ix[dates[_sage_const_0 ], 'A']" + '\n', '', 'single')
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/tseries/index.py", line 1369, in __getitem__
    return self._simple_new(result, self.name, new_offset, self.tz)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/tseries/index.py", line 446, in _simple_new
    result.name = name
AttributeError: 'numpy.datetime64' object has no attribute 'name'

#df.iloc[3] df.ix[3]

A   -1.672190
B   -1.078498
C    0.843426
D   -1.578764
Name: 3, dtype: float64

A   -1.672190
B   -1.078498
C    0.843426
D   -1.578764
Name: 3, dtype: float64

df.iloc[3:5, 0:2]

                   A         B
2013-01-04 -1.971963  0.634871
2013-01-05 -0.423241  0.099963

[2 rows x 2 columns]

                   A         B
2013-01-04 -1.971963  0.634871
2013-01-05 -0.423241  0.099963

[2 rows x 2 columns]

df.iloc[1:3, :]

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026

[2 rows x 4 columns]

                   A         B         C         D
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026

[2 rows x 4 columns]

df.iloc[:, 1:3]

                   B         C
2013-01-01 -1.027603  0.045742
2013-01-02 -2.990553 -0.918132
2013-01-03 -0.270507 -1.142525
2013-01-04  0.634871  0.977957
2013-01-05  0.099963 -0.193580
2013-01-06 -0.472659  0.745064

[6 rows x 2 columns]

                   B         C
2013-01-01 -1.027603  0.045742
2013-01-02 -2.990553 -0.918132
2013-01-03 -0.270507 -1.142525
2013-01-04  0.634871  0.977957
2013-01-05  0.099963 -0.193580
2013-01-06 -0.472659  0.745064

[6 rows x 2 columns]

#df.iloc[1,1] df.ix[1,1]

0.036577458005332408

0.036577458005332408

df.iat[1, 1]

Traceback (click to the left of this block for traceback)
...
ValueError: iAt based indexing can only have integer indexers

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_272.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("ZGYuaWF0WzEsIDFd"),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpv2oWCb/___code___.py", line 3, in <module>
    exec compile(u'df.iat[_sage_const_1 , _sage_const_1 ]
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1257, in __getitem__
    key = self._convert_key(key)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1289, in _convert_key
    raise ValueError("iAt based indexing can only have integer "
ValueError: iAt based indexing can only have integer indexers

x = list('abcdef')

x[4:10]

['e', 'f']

['e', 'f']

x[8:10]

[]

[]

df.iloc[:, 8:10]

Traceback (click to the left of this block for traceback)
...
IndexError: out-of-bounds on slice (start)

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_276.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("ZGYuaWxvY1s6LCA4OjEwXQ=="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpkjSKsW/___code___.py", line 3, in <module>
    exec compile(u'df.iloc[:, _sage_const_8 :_sage_const_10 ]
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1018, in __getitem__
    return self._getitem_tuple(key)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1190, in _getitem_tuple
    retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1210, in _getitem_axis
    return self._get_slice_axis(key, axis=axis)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1202, in _get_slice_axis
    typ='iloc')
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 75, in _slice
    typ=typ)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/frame.py", line 1836, in _slice
    slobj, axis=axis, raise_on_error=raise_on_error)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/internals.py", line 2516, in get_slice
    _check_slice_bounds(slobj, new_axes[axis])
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1523, in _check_slice_bounds
    raise IndexError("out-of-bounds on slice (start)")
IndexError: out-of-bounds on slice (start)

df[df.A > 0]

                   A         B         C        D
2013-01-01  0.329222 -1.027603  0.045742 -0.00504

[1 rows x 4 columns]

                   A         B         C        D
2013-01-01  0.329222 -1.027603  0.045742 -0.00504

[1 rows x 4 columns]

df[df > 0]

                   A         B         C         D
2013-01-01  0.329222       NaN  0.045742       NaN
2013-01-02       NaN       NaN       NaN       NaN
2013-01-03       NaN       NaN       NaN  0.030026
2013-01-04       NaN  0.634871  0.977957  0.762491
2013-01-05       NaN  0.099963       NaN  0.585892
2013-01-06       NaN       NaN  0.745064       NaN

[6 rows x 4 columns]

                   A         B         C         D
2013-01-01  0.329222       NaN  0.045742       NaN
2013-01-02       NaN       NaN       NaN       NaN
2013-01-03       NaN       NaN       NaN  0.030026
2013-01-04       NaN  0.634871  0.977957  0.762491
2013-01-05       NaN  0.099963       NaN  0.585892
2013-01-06       NaN       NaN  0.745064       NaN

[6 rows x 4 columns]

s1 = pd.Series([1,2,3,4,5,6],index=pd.date_range('20130102', '20130107'))

s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: object

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: object

df['F'] = s1

df.at[dates[0], 'A']

Traceback (click to the left of this block for traceback)
...
AttributeError: 'numpy.datetime64' object has no attribute 'name'

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_282.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("ZGYuYXRbZGF0ZXNbMF0sICdBJ10="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpCRl5v1/___code___.py", line 3, in <module>
    exec compile(u"df.at[dates[_sage_const_0 ], 'A']" + '\n', '', 'single')
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/tseries/index.py", line 1369, in __getitem__
    return self._simple_new(result, self.name, new_offset, self.tz)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/tseries/index.py", line 446, in _simple_new
    result.name = name
AttributeError: 'numpy.datetime64' object has no attribute 'name'

df.iat[0,1]

Traceback (click to the left of this block for traceback)
...
ValueError: iAt based indexing can only have integer indexers

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_283.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("ZGYuaWF0WzAsMV0="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmp2NmQDZ/___code___.py", line 3, in <module>
    exec compile(u'df.iat[_sage_const_0 ,_sage_const_1 ]
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1257, in __getitem__
    key = self._convert_key(key)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/indexing.py", line 1289, in _convert_key
    raise ValueError("iAt based indexing can only have integer "
ValueError: iAt based indexing can only have integer indexers

df

                   A         B         C         D    F
2013-01-01  0.329222 -1.027603  0.045742 -0.005040  NaN
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217    1
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026    2
2013-01-04 -1.971963  0.634871  0.977957  0.762491    3
2013-01-05 -0.423241  0.099963 -0.193580  0.585892    4
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818    5

[6 rows x 5 columns]

                   A         B         C         D    F
2013-01-01  0.329222 -1.027603  0.045742 -0.005040  NaN
2013-01-02 -2.131626 -2.990553 -0.918132 -1.119217    1
2013-01-03 -0.967064 -0.270507 -1.142525  0.030026    2
2013-01-04 -1.971963  0.634871  0.977957  0.762491    3
2013-01-05 -0.423241  0.099963 -0.193580  0.585892    4
2013-01-06 -1.911004 -0.472659  0.745064 -0.614818    5

[6 rows x 5 columns]

df.loc[:,'D'] = np.array([5] * len(df))

df

                   A         B         C  D    F
2013-01-01  0.329222 -1.027603  0.045742  5  NaN
2013-01-02 -2.131626 -2.990553 -0.918132  5    1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2
2013-01-04 -1.971963  0.634871  0.977957  5    3
2013-01-05 -0.423241  0.099963 -0.193580  5    4
2013-01-06 -1.911004 -0.472659  0.745064  5    5

[6 rows x 5 columns]

                   A         B         C  D    F
2013-01-01  0.329222 -1.027603  0.045742  5  NaN
2013-01-02 -2.131626 -2.990553 -0.918132  5    1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2
2013-01-04 -1.971963  0.634871  0.977957  5    3
2013-01-05 -0.423241  0.099963 -0.193580  5    4
2013-01-06 -1.911004 -0.472659  0.745064  5    5

[6 rows x 5 columns]

df2 = df.copy() df2[df2 > 0] = -df2

Traceback (click to the left of this block for traceback)
...
TypeError: Cannot do boolean setting on mixed-type frame

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_287.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("ZGYyID0gZGYuY29weSgpCmRmMltkZjIgPiAwXSA9IC1kZjI="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpQ7mx4G/___code___.py", line 4, in <module>
    exec compile(u'df2[df2 > _sage_const_0 ] = -df2
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/frame.py", line 1860, in __setitem__
    self._setitem_frame(key, value)
  File "/usr/local/sage-6.0/local/lib/python2.7/site-packages/pandas-0.13.0-py2.7-linux-x86_64.egg/pandas/core/frame.py", line 1896, in _setitem_frame
    'Cannot do boolean setting on mixed-type frame')
TypeError: Cannot do boolean setting on mixed-type frame

df2[df2 > 0]

                   A         B         C  D    F
2013-01-01  0.329222       NaN  0.045742  5  NaN
2013-01-02       NaN       NaN       NaN  5    1
2013-01-03       NaN       NaN       NaN  5    2
2013-01-04       NaN  0.634871  0.977957  5    3
2013-01-05       NaN  0.099963       NaN  5    4
2013-01-06       NaN       NaN  0.745064  5    5

[6 rows x 5 columns]

                   A         B         C  D    F
2013-01-01  0.329222       NaN  0.045742  5  NaN
2013-01-02       NaN       NaN       NaN  5    1
2013-01-03       NaN       NaN       NaN  5    2
2013-01-04       NaN  0.634871  0.977957  5    3
2013-01-05       NaN  0.099963       NaN  5    4
2013-01-06       NaN       NaN  0.745064  5    5

[6 rows x 5 columns]

df2

                   A         B         C  D    F
2013-01-01  0.329222 -1.027603  0.045742  5  NaN
2013-01-02 -2.131626 -2.990553 -0.918132  5    1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2
2013-01-04 -1.971963  0.634871  0.977957  5    3
2013-01-05 -0.423241  0.099963 -0.193580  5    4
2013-01-06 -1.911004 -0.472659  0.745064  5    5

[6 rows x 5 columns]

                   A         B         C  D    F
2013-01-01  0.329222 -1.027603  0.045742  5  NaN
2013-01-02 -2.131626 -2.990553 -0.918132  5    1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2
2013-01-04 -1.971963  0.634871  0.977957  5    3
2013-01-05 -0.423241  0.099963 -0.193580  5    4
2013-01-06 -1.911004 -0.472659  0.745064  5    5

[6 rows x 5 columns]

df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])

#df1.loc[dates[0]:dates[1], 'E') df1.loc['20130101':'20130102', : )

Traceback (click to the left of this block for traceback)
...
SyntaxError: invalid syntax

Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_291.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("I2RmMS5sb2NbZGF0ZXNbMF06ZGF0ZXNbMV0sICdFJykKZGYxLmxvY1snMjAxMzAxMDEnOicyMDEzMDEwMicsIDogKQ=="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpIhpBkb/___code___.py", line 3
    df1.loc['20130101':'20130102', : )
                                     ^
SyntaxError: invalid syntax

df1.loc['20130101':'20130102', 'E'] = 1

df1

                   A         B         C  D    F   E
2013-01-01  0.329222 -1.027603  0.045742  5  NaN   1
2013-01-02 -2.131626 -2.990553 -0.918132  5    1   1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2 NaN
2013-01-04 -1.971963  0.634871  0.977957  5    3 NaN

[4 rows x 6 columns]

                   A         B         C  D    F   E
2013-01-01  0.329222 -1.027603  0.045742  5  NaN   1
2013-01-02 -2.131626 -2.990553 -0.918132  5    1   1
2013-01-03 -0.967064 -0.270507 -1.142525  5    2 NaN
2013-01-04 -1.971963  0.634871  0.977957  5    3 NaN

[4 rows x 6 columns]

df1.dropna(how='any')

                   A         B         C  D  F  E
2013-01-02 -2.131626 -2.990553 -0.918132  5  1  1

[1 rows x 6 columns]

                   A         B         C  D  F  E
2013-01-02 -2.131626 -2.990553 -0.918132  5  1  1

[1 rows x 6 columns]

df1.fillna(value=5)

                   A         B         C  D  F  E
2013-01-01  0.329222 -1.027603  0.045742  5  5  1
2013-01-02 -2.131626 -2.990553 -0.918132  5  1  1
2013-01-03 -0.967064 -0.270507 -1.142525  5  2  5
2013-01-04 -1.971963  0.634871  0.977957  5  3  5

[4 rows x 6 columns]

                   A         B         C  D  F  E
2013-01-01  0.329222 -1.027603  0.045742  5  5  1
2013-01-02 -2.131626 -2.990553 -0.918132  5  1  1
2013-01-03 -0.967064 -0.270507 -1.142525  5  2  5
2013-01-04 -1.971963  0.634871  0.977957  5  3  5

[4 rows x 6 columns]

pd.isnull(df1)

                A      B      C      D      F      E
2013-01-01  False  False  False  False   True  False
2013-01-02  False  False  False  False  False  False
2013-01-03  False  False  False  False  False   True
2013-01-04  False  False  False  False  False   True

[4 rows x 6 columns]

                A      B      C      D      F      E
2013-01-01  False  False  False  False   True  False
2013-01-02  False  False  False  False  False  False
2013-01-03  False  False  False  False  False   True
2013-01-04  False  False  False  False  False   True

[4 rows x 6 columns]

df.mean()

A   -1.179279
B   -0.671081
C   -0.080912
D    5.000000
F    3.000000
dtype: float64

A   -1.179279
B   -0.671081
C   -0.080912
D    5.000000
F    3.000000
dtype: float64

#df.mean(1) df.T.mean()

2013-01-01    1.086840
2013-01-02   -0.008062
2013-01-03    0.923981
2013-01-04    1.528173
2013-01-05    1.696628
2013-01-06    1.672280
Freq: D, dtype: float64

2013-01-01    1.086840
2013-01-02   -0.008062
2013-01-03    0.923981
2013-01-04    1.528173
2013-01-05    1.696628
2013-01-06    1.672280
Freq: D, dtype: float64

s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2); s

2013-01-01    NaN
2013-01-02    NaN
2013-01-03      1
2013-01-04      3
2013-01-05      5
2013-01-06    NaN
Freq: D, dtype: object

2013-01-01    NaN
2013-01-02    NaN
2013-01-03      1
2013-01-04      3
2013-01-05      5
2013-01-06    NaN
Freq: D, dtype: object

df.sub(s, axis='index')

                   A         B         C    D    F
2013-01-01       NaN       NaN       NaN  NaN  NaN
2013-01-02       NaN       NaN       NaN  NaN  NaN
2013-01-03 -1.967064 -1.270507 -2.142525    4    1
2013-01-04 -4.971963 -2.365129 -2.022043    2    0
2013-01-05 -5.423241 -4.900037  -5.19358    0   -1
2013-01-06       NaN       NaN       NaN  NaN  NaN

[6 rows x 5 columns]

                   A         B         C    D    F
2013-01-01       NaN       NaN       NaN  NaN  NaN
2013-01-02       NaN       NaN       NaN  NaN  NaN
2013-01-03 -1.967064 -1.270507 -2.142525    4    1
2013-01-04 -4.971963 -2.365129 -2.022043    2    0
2013-01-05 -5.423241 -4.900037  -5.19358    0   -1
2013-01-06       NaN       NaN       NaN  NaN  NaN

[6 rows x 5 columns]

df.apply(np.cumsum)

                   A         B         C   D   F
2013-01-01  0.329222 -1.027603  0.045742   5 NaN
2013-01-02 -1.802404 -4.018156 -0.872390  10   1
2013-01-03 -2.769467 -4.288663 -2.014915  15   3
2013-01-04 -4.741431 -3.653792 -1.036958  20   6
2013-01-05 -5.164671 -3.553829 -1.230538  25  10
2013-01-06 -7.075675 -4.026488 -0.485474  30  15

[6 rows x 5 columns]

                   A         B         C   D   F
2013-01-01  0.329222 -1.027603  0.045742   5 NaN
2013-01-02 -1.802404 -4.018156 -0.872390  10   1
2013-01-03 -2.769467 -4.288663 -2.014915  15   3
2013-01-04 -4.741431 -3.653792 -1.036958  20   6
2013-01-05 -5.164671 -3.553829 -1.230538  25  10
2013-01-06 -7.075675 -4.026488 -0.485474  30  15

[6 rows x 5 columns]

df.apply(lambda x: x.max() - x.min())

A    2.460849
B    3.625425
C    2.120482
D           0
F           4
dtype: object

A    2.460849
B    3.625425
C    2.120482
D           0
F           4
dtype: object

s = pd.Series(np.random.randint(0,7,size=10)); s

0    6
1    2
2    6
3    0
4    5
5    6
6    3
7    2
8    2
9    5
dtype: int64

0    6
1    2
2    6
3    0
4    5
5    6
6    3
7    2
8    2
9    5
dtype: int64

s.value_counts()

6    3
2    3
5    2
3    1
0    1
dtype: int64

6    3
2    3
5    2
3    1
0    1
dtype: int64

s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat']) s.str.lower()

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

0       a
1       b
2       c
3    aaba
4    baca
5     NaN
6    caba
7     dog
8     cat
dtype: object

df = pd.DataFrame(np.random.randn(10, 4)); df

          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[10 rows x 4 columns]

          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[10 rows x 4 columns]

pieces = [df[:3], df[3:7], df[7:]]; pieces

[          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446

[3 rows x 4 columns],           0         1         2         3
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106

[4 rows x 4 columns],           0         1         2         3
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[3 rows x 4 columns]]

[          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446

[3 rows x 4 columns],           0         1         2         3
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106

[4 rows x 4 columns],           0         1         2         3
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[3 rows x 4 columns]]

pd.concat(pieces)

          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[10 rows x 4 columns]

          0         1         2         3
0  1.928492  1.118287 -2.519915 -0.732903
1 -0.255994 -0.874783 -0.112143 -0.550197
2  0.020601  0.191504  1.031185  0.173446
3 -0.190841 -1.545637  1.370097  0.628026
4  0.339714 -0.241329  1.591237 -0.114874
5  0.065372  1.231525  1.482569 -0.328469
6 -0.359970  0.849471 -1.130912 -1.500106
7  0.457768 -1.622407 -0.736991  0.392162
8  0.794101  1.042252  1.713021  2.795153
9 -0.689289 -0.517617  0.665668 -0.286480

[10 rows x 4 columns]

left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]}) right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [3, 4]}) left

   key lval
0  foo    1
1  foo    2

[2 rows x 2 columns]

   key lval
0  foo    1
1  foo    2

[2 rows x 2 columns]

right

   key rval
0  foo    3
1  foo    4

[2 rows x 2 columns]

   key rval
0  foo    3
1  foo    4

[2 rows x 2 columns]

pd.merge(left, right, on='key')

   key lval rval
0  foo    1    3
1  foo    1    4
2  foo    2    3
3  foo    2    4

[4 rows x 3 columns]

   key lval rval
0  foo    1    3
1  foo    1    4
2  foo    2    3
3  foo    2    4

[4 rows x 3 columns]

df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']); df

          A         B         C         D
0  0.370320  0.110648 -0.609493  0.965530
1 -0.036627  0.036577 -1.005085  0.379531
2 -0.203456 -0.676061 -0.699941 -0.462796
3 -1.672190 -1.078498  0.843426 -1.578764
4 -0.471450 -0.018858 -1.428270 -1.296049
5  0.971352  0.181037 -0.014627 -0.077370
6 -0.008730  0.574746 -0.546932 -0.345956
7  0.945161 -0.912941  1.247042 -1.055882

[8 rows x 4 columns]

          A         B         C         D
0  0.370320  0.110648 -0.609493  0.965530
1 -0.036627  0.036577 -1.005085  0.379531
2 -0.203456 -0.676061 -0.699941 -0.462796
3 -1.672190 -1.078498  0.843426 -1.578764
4 -0.471450 -0.018858 -1.428270 -1.296049
5  0.971352  0.181037 -0.014627 -0.077370
6 -0.008730  0.574746 -0.546932 -0.345956
7  0.945161 -0.912941  1.247042 -1.055882

[8 rows x 4 columns]

# s = df.iloc[3] s = df.ix[3]; s

A   -1.672190
B   -1.078498
C    0.843426
D   -1.578764
Name: 3, dtype: float64

A   -1.672190
B   -1.078498
C    0.843426
D   -1.578764
Name: 3, dtype: float64

df.append(s, ignore_index=True)

          A         B         C         D
0  0.370320  0.110648 -0.609493  0.965530
1 -0.036627  0.036577 -1.005085  0.379531
2 -0.203456 -0.676061 -0.699941 -0.462796
3 -1.672190 -1.078498  0.843426 -1.578764
4 -0.471450 -0.018858 -1.428270 -1.296049
5  0.971352  0.181037 -0.014627 -0.077370
6 -0.008730  0.574746 -0.546932 -0.345956
7  0.945161 -0.912941  1.247042 -1.055882
8 -1.672190 -1.078498  0.843426 -1.578764

[9 rows x 4 columns]

          A         B         C         D
0  0.370320  0.110648 -0.609493  0.965530
1 -0.036627  0.036577 -1.005085  0.379531
2 -0.203456 -0.676061 -0.699941 -0.462796
3 -1.672190 -1.078498  0.843426 -1.578764
4 -0.471450 -0.018858 -1.428270 -1.296049
5  0.971352  0.181037 -0.014627 -0.077370
6 -0.008730  0.574746 -0.546932 -0.345956
7  0.945161 -0.912941  1.247042 -1.055882
8 -1.672190 -1.078498  0.843426 -1.578764

[9 rows x 4 columns]

df = pd.DataFrame({ 'A' : ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B' : ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C' : np.random.randn(8), 'D' : np.random.randn(8) })

df

     A      B         C         D
0  foo    one -0.462028 -0.557120
1  bar    one  0.516956 -0.565886
2  foo    two -0.250218 -0.132476
3  bar  three -0.672271  1.173194
4  foo    two  0.415148  0.557836
5  bar    two -1.517287  0.178196
6  foo    one  1.391643 -1.248932
7  foo  three -0.396384 -1.506650

[8 rows x 4 columns]

     A      B         C         D
0  foo    one -0.462028 -0.557120
1  bar    one  0.516956 -0.565886
2  foo    two -0.250218 -0.132476
3  bar  three -0.672271  1.173194
4  foo    two  0.415148  0.557836
5  bar    two -1.517287  0.178196
6  foo    one  1.391643 -1.248932
7  foo  three -0.396384 -1.506650

[8 rows x 4 columns]

df.groupby('A').sum()

            C         D
A                      
bar -1.672603  0.785505
foo  0.698161 -2.887342

[2 rows x 2 columns]

            C         D
A                      
bar -1.672603  0.785505
foo  0.698161 -2.887342

[2 rows x 2 columns]

df.groupby(['A', 'B']).sum()

                  C         D
A   B                        
bar one    0.516956 -0.565886
    three -0.672271  1.173194
    two   -1.517287  0.178196
foo one    0.929615 -1.806052
    three -0.396384 -1.506650
    two    0.164930  0.425360

[6 rows x 2 columns]

                  C         D
A   B                        
bar one    0.516956 -0.565886
    three -0.672271  1.173194
    two   -1.517287  0.178196
foo one    0.929615 -1.806052
    three -0.396384 -1.506650
    two    0.164930  0.425360

[6 rows x 2 columns]

tuples = zip( ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']); tuples

[('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'), ('foo',
'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')]

[('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'), ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')]

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

df2 = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B']); df2

                     A         B
first second                    
bar   one    -1.138084  0.045845
      two    -0.721766 -0.766594
baz   one    -0.102921  0.458671
      two    -0.624306 -1.613681
foo   one     0.843396  0.989673
      two     0.259121 -1.407217
qux   one     0.643975 -0.127660
      two     0.081298  0.231508

[8 rows x 2 columns]

                     A         B
first second                    
bar   one    -1.138084  0.045845
      two    -0.721766 -0.766594
baz   one    -0.102921  0.458671
      two    -0.624306 -1.613681
foo   one     0.843396  0.989673
      two     0.259121 -1.407217
qux   one     0.643975 -0.127660
      two     0.081298  0.231508

[8 rows x 2 columns]

#df2 = df2[:4]

stacked = df2.stack(); stacked

first  second   
bar    one     A   -1.138084
               B    0.045845
       two     A   -0.721766
               B   -0.766594
baz    one     A   -0.102921
               B    0.458671
       two     A   -0.624306
               B   -1.613681
foo    one     A    0.843396
               B    0.989673
       two     A    0.259121
               B   -1.407217
qux    one     A    0.643975
               B   -0.127660
       two     A    0.081298
               B    0.231508
dtype: float64

first  second   
bar    one     A   -1.138084
               B    0.045845
       two     A   -0.721766
               B   -0.766594
baz    one     A   -0.102921
               B    0.458671
       two     A   -0.624306
               B   -1.613681
foo    one     A    0.843396
               B    0.989673
       two     A    0.259121
               B   -1.407217
qux    one     A    0.643975
               B   -0.127660
       two     A    0.081298
               B    0.231508
dtype: float64

stacked.unstack()

                     A         B
first second                    
bar   one    -1.138084  0.045845
      two    -0.721766 -0.766594
baz   one    -0.102921  0.458671
      two    -0.624306 -1.613681
foo   one     0.843396  0.989673
      two     0.259121 -1.407217
qux   one     0.643975 -0.127660
      two     0.081298  0.231508

[8 rows x 2 columns]

                     A         B
first second                    
bar   one    -1.138084  0.045845
      two    -0.721766 -0.766594
baz   one    -0.102921  0.458671
      two    -0.624306 -1.613681
foo   one     0.843396  0.989673
      two     0.259121 -1.407217
qux   one     0.643975 -0.127660
      two     0.081298  0.231508

[8 rows x 2 columns]

#stacked.unstack(1) stacked.unstack('second')

second        one       two
first                      
bar   A -0.721294  1.497937
      B  0.095341 -0.236623
baz   A -1.341615 -1.367798
      B -0.323850  0.899221
foo   A -0.458022  0.142316
      B  0.936772  0.197030
qux   A -0.902071  0.627089
      B -1.144674  0.599214

[8 rows x 2 columns]

second        one       two
first                      
bar   A -0.721294  1.497937
      B  0.095341 -0.236623
baz   A -1.341615 -1.367798
      B -0.323850  0.899221
foo   A -0.458022  0.142316
      B  0.936772  0.197030
qux   A -0.902071  0.627089
      B -1.144674  0.599214

[8 rows x 2 columns]

#stacked.unstack(0) stacked.unstack('first')

first          bar       baz       foo       qux
second                                          
one    A -0.721294 -1.341615 -0.458022 -0.902071
       B  0.095341 -0.323850  0.936772 -1.144674
two    A  1.497937 -1.367798  0.142316  0.627089
       B -0.236623  0.899221  0.197030  0.599214

[4 rows x 4 columns]

first          bar       baz       foo       qux
second                                          
one    A -0.721294 -1.341615 -0.458022 -0.902071
       B  0.095341 -0.323850  0.936772 -1.144674
two    A  1.497937 -1.367798  0.142316  0.627089
       B -0.236623  0.899221  0.197030  0.599214

[4 rows x 4 columns]

df = pd.DataFrame({ 'A' : ['one', 'one', 'two', 'three'] * 3, 'B' : ['A', 'B', 'C'] * 4, 'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, 'D' : np.random.randn(12), 'E' : np.random.randn(12)})

df

        A  B    C         D         E
0     one  A  foo -0.575791  1.237530
1     one  B  foo -0.878026 -2.105447
2     two  C  foo  0.358016 -1.244665
3   three  A  bar -1.554278  2.092796
4     one  B  bar -0.361680 -0.612601
5     one  C  bar  0.996817 -0.342134
6     two  A  foo  0.315994  0.053370
7   three  B  foo  0.365110 -1.792427
8     one  C  foo  0.267303  1.612153
9     one  A  bar -0.481900  0.101901
10    two  B  bar  0.934088 -0.738267
11  three  C  bar -0.356125 -1.344649

[12 rows x 5 columns]

        A  B    C         D         E
0     one  A  foo -0.575791  1.237530
1     one  B  foo -0.878026 -2.105447
2     two  C  foo  0.358016 -1.244665
3   three  A  bar -1.554278  2.092796
4     one  B  bar -0.361680 -0.612601
5     one  C  bar  0.996817 -0.342134
6     two  A  foo  0.315994  0.053370
7   three  B  foo  0.365110 -1.792427
8     one  C  foo  0.267303  1.612153
9     one  A  bar -0.481900  0.101901
10    two  B  bar  0.934088 -0.738267
11  three  C  bar -0.356125 -1.344649

[12 rows x 5 columns]

pd.pivot_table(df, values='D', rows=['A', 'B'], cols = ['C'])

C             bar       foo
A     B                    
one   A -0.481900 -0.575791
      B -0.361680 -0.878026
      C  0.996817  0.267303
three A -1.554278       NaN
      B       NaN  0.365110
      C -0.356125       NaN
two   A       NaN  0.315994
      B  0.934088       NaN
      C       NaN  0.358016

[9 rows x 2 columns]

C             bar       foo
A     B                    
one   A -0.481900 -0.575791
      B -0.361680 -0.878026
      C  0.996817  0.267303
three A -1.554278       NaN
      B       NaN  0.365110
      C -0.356125       NaN
two   A       NaN  0.315994
      B  0.934088       NaN
      C       NaN  0.358016

[9 rows x 2 columns]

#rng = pd.date_range('1/1/2012', period=100, freq='S') rng = pd.date_range('2013-01-01', '2013-03-01', freq='S')

ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)

ts.resample('5Min', how='sum')

2013-01-01 00:00:00    73259
2013-01-01 00:05:00    77648
2013-01-01 00:10:00    76375
2013-01-01 00:15:00    77739
2013-01-01 00:20:00    78859
2013-01-01 00:25:00    75512
2013-01-01 00:30:00    72930
2013-01-01 00:35:00    74449
2013-01-01 00:40:00    74204
2013-01-01 00:45:00    74762
2013-01-01 00:50:00    72786
2013-01-01 00:55:00    77871
2013-01-01 01:00:00    71174
2013-01-01 01:05:00    76263
2013-01-01 01:10:00    66631
...
2013-02-28 22:50:00    74266
2013-02-28 22:55:00    71942
2013-02-28 23:00:00    74738
2013-02-28 23:05:00    74001
2013-02-28 23:10:00    74506
2013-02-28 23:15:00    72185
2013-02-28 23:20:00    73015
2013-02-28 23:25:00    74474
2013-02-28 23:30:00    76912
2013-02-28 23:35:00    74203
2013-02-28 23:40:00    73931
2013-02-28 23:45:00    71752
2013-02-28 23:50:00    69154
2013-02-28 23:55:00    74418
2013-03-01 00:00:00      135
Freq: 5T, Length: 16993

2013-01-01 00:00:00    73259
2013-01-01 00:05:00    77648
2013-01-01 00:10:00    76375
2013-01-01 00:15:00    77739
2013-01-01 00:20:00    78859
2013-01-01 00:25:00    75512
2013-01-01 00:30:00    72930
2013-01-01 00:35:00    74449
2013-01-01 00:40:00    74204
2013-01-01 00:45:00    74762
2013-01-01 00:50:00    72786
2013-01-01 00:55:00    77871
2013-01-01 01:00:00    71174
2013-01-01 01:05:00    76263
2013-01-01 01:10:00    66631
...
2013-02-28 22:50:00    74266
2013-02-28 22:55:00    71942
2013-02-28 23:00:00    74738
2013-02-28 23:05:00    74001
2013-02-28 23:10:00    74506
2013-02-28 23:15:00    72185
2013-02-28 23:20:00    73015
2013-02-28 23:25:00    74474
2013-02-28 23:30:00    76912
2013-02-28 23:35:00    74203
2013-02-28 23:40:00    73931
2013-02-28 23:45:00    71752
2013-02-28 23:50:00    69154
2013-02-28 23:55:00    74418
2013-03-01 00:00:00      135
Freq: 5T, Length: 16993

rng = pd.date_range('2013-01-01', '2013-01-05', freq='D') ts = pd.Series(np.random.randn(len(rng)), rng) ts

2013-01-01   -1.062925
2013-01-02   -1.662437
2013-01-03   -0.519298
2013-01-04   -0.393173
2013-01-05   -1.789035
Freq: D, dtype: float64

2013-01-01   -1.062925
2013-01-02   -1.662437
2013-01-03   -0.519298
2013-01-04   -0.393173
2013-01-05   -1.789035
Freq: D, dtype: float64

ts_utc = ts.tz_localize('UTC'); ts_utc

2013-01-01 00:00:00+00:00   -1.062925
2013-01-02 00:00:00+00:00   -1.662437
2013-01-03 00:00:00+00:00   -0.519298
2013-01-04 00:00:00+00:00   -0.393173
2013-01-05 00:00:00+00:00   -1.789035
Freq: D, dtype: float64

2013-01-01 00:00:00+00:00   -1.062925
2013-01-02 00:00:00+00:00   -1.662437
2013-01-03 00:00:00+00:00   -0.519298
2013-01-04 00:00:00+00:00   -0.393173
2013-01-05 00:00:00+00:00   -1.789035
Freq: D, dtype: float64

ts_utc.tz_convert('US/Eastern')

2012-12-31 19:00:00-05:00   -1.062925
2013-01-01 19:00:00-05:00   -1.662437
2013-01-02 19:00:00-05:00   -0.519298
2013-01-03 19:00:00-05:00   -0.393173
2013-01-04 19:00:00-05:00   -1.789035
Freq: D, dtype: float64

2012-12-31 19:00:00-05:00   -1.062925
2013-01-01 19:00:00-05:00   -1.662437
2013-01-02 19:00:00-05:00   -0.519298
2013-01-03 19:00:00-05:00   -0.393173
2013-01-04 19:00:00-05:00   -1.789035
Freq: D, dtype: float64

ts.to_period()

2013-01-01   -1.062925
2013-01-02   -1.662437
2013-01-03   -0.519298
2013-01-04   -0.393173
2013-01-05   -1.789035
Freq: D, dtype: float64

2013-01-01   -1.062925
2013-01-02   -1.662437
2013-01-03   -0.519298
2013-01-04   -0.393173
2013-01-05   -1.789035
Freq: D, dtype: float64

ts.plot()

<matplotlib.axes.AxesSubplot object at 0x1d0d6e90>

<matplotlib.axes.AxesSubplot object at 0x1d0d6e90>

plt.savefig('test.png')

10_minutes_to_pandas

4151 days ago by takepwave