Class03 Answer:

Write Pandas syntax which mimics this SQL:


INSERT INTO prices3
SELECT cdate,openp,closep, closep - openp AS diff FROM prices
WHERE cdate = '2016-08-01';

INSERT INTO prices3
SELECT cdate,openp,closep, closep - openp AS diff FROM prices
WHERE  cdate BETWEEN '2016-08-01' AND '2016-08-31'
AND    cdate NOT IN (SELECT cdate FROM prices3);

UPDATE prices3 SET diff = 0;

I ran the above SQL and saw this:



ann=# 
ann=# INSERT INTO prices3
ann-# SELECT cdate,openp,closep, closep - openp AS diff FROM prices
ann-# WHERE cdate = '2016-08-01';
INSERT 0 1
ann=# 
ann=# INSERT INTO prices3
ann-# SELECT cdate,openp,closep, closep - openp AS diff FROM prices
ann-# WHERE  cdate BETWEEN '2016-08-01' AND '2016-08-31'
ann-# AND    cdate NOT IN (SELECT cdate FROM prices3);
INSERT 0 5
ann=# 
ann=# UPDATE prices3 SET diff = 0;
UPDATE 221
ann=#
ann=# -- The above NOT IN helps me prevent insert of duplicates:


ann=# SELECT * FROM prices3 WHERE cdate BETWEEN '2016-08-01' AND '2016-09-01' ORDER BY cdate;
   cdate    |   openp    |   closep   | diff 
------------+------------+------------+------
 2016-08-01 | 155.089142 | 161.449997 |    0
 2016-08-02 | 155.687683 | 160.580002 |    0
 2016-08-03 | 154.587158 | 160.669998 |    0
 2016-08-04 | 155.291885 | 161.550003 |    0
 2016-08-05 | 156.392410 | 163.500000 |    0
 2016-08-08 | 158.453949 | 162.039993 |    0
 2016-08-09 | 157.957336 | 161.770004 |    0
 2016-08-10 | 157.928146 | 162.080002 |    0
 2016-08-11 | 157.986557 | 163.529999 |    0
 2016-08-12 | 158.901871 | 161.949997 |    0
 2016-08-15 | 158.132614 | 161.880005 |    0
 2016-08-16 | 156.876511 | 160.699997 |    0
 2016-08-17 | 156.740189 | 160.440002 |    0
 2016-08-18 | 156.662292 | 161.360001 |    0
 2016-08-19 | 156.613617 | 160.039993 |    0
 2016-08-22 | 155.795685 | 160.000000 |    0
 2016-08-23 | 156.117004 | 160.259995 |    0
 2016-08-24 | 155.688583 | 159.050003 |    0
 2016-08-25 | 154.821960 | 158.630005 |    0
 2016-08-26 | 154.705109 | 158.320007 |    0
 2016-08-29 | 154.656433 | 159.720001 |    0
 2016-08-30 | 155.561996 | 159.399994 |    0
 2016-08-31 | 155.464615 | 158.880005 |    0
 2016-09-01 | 154.159836 | 159.539993 |    0
(24 rows)

ann=# 

I did this lab by writing the script listed below:


"""
class03pd26.py

This script should mimic the SQL listed below:

-- Copy rows from table to table:
INSERT INTO prices3
SELECT cdate,openp,closep, closep - openp AS diff FROM prices
WHERE cdate = '2016-08-01';

-- Copy rows from table to table2 where rows not in table2 already:
INSERT INTO prices3
SELECT cdate,openp,closep, closep - openp AS diff FROM prices
WHERE  cdate BETWEEN '2016-08-01' AND '2016-08-31'
AND    cdate NOT IN (SELECT cdate FROM prices3);

-- Update all rows of a column:
UPDATE prices3 SET diff = 0;
"""

# SQL lacks the ability to get data from the web.
# Pandas can get data from the web:

import pandas as pd

prices_df         = pd.read_csv('http://ml4.us/csv/ibm.csv')
prices_df.columns = ['cdate_s', 'openp', 'highp', 'lowp', 'closep', 'adjp', 'volume']

# I should setup prices3:
pred1a_sr  = prices_df.cdate_s > '2016-08-08'
pred1b_sr  = prices_df.cdate_s < '2016-09-01'
pred1_sr   = (pred1a_sr & pred1b_sr)
prices3_df = prices_df.copy()[['cdate_s','openp','closep']][pred1_sr]
prices3_df['diff'] = prices3_df.closep - prices3_df.openp

# Mimic
# INSERT INTO prices3
# SELECT cdate,openp,closep, closep - openp AS diff FROM prices
# WHERE cdate = '2016-08-01';

pred2_sr       = prices_df.cdate_s == '2016-08-01'
tmp_df         = prices_df.copy()[['cdate_s','openp','closep']][pred2_sr]
tmp_df['diff'] = tmp_df.closep - tmp_df.openp
prices3_df     = pd.concat((prices3_df,tmp_df))

# I should report:
print(prices3_df.sort_values(['cdate_s']))

# Mimic
# -- Copy rows from table to table2 where rows not in table2 already:
# INSERT INTO prices3
# SELECT cdate,openp,closep, closep - openp AS diff FROM prices
# WHERE  cdate BETWEEN '2016-08-01' AND '2016-08-31'
# AND    cdate NOT IN (SELECT cdate FROM prices3);

# I should create the first predicate:
pred_first_sr = (prices_df.cdate_s >= '2016-08-01') & (prices_df.cdate_s <= '2016-08-31')
# I should create the 2nd predicate:
pred_isin_sr  = prices_df.cdate_s.isin(prices3_df.cdate_s)

pred_notin_sr = ~pred_isin_sr # Similar to NOT IN syntax in SQL

# I should create the 3rd predicate:
pred3_sr = pred_first_sr & pred_notin_sr
# I should apply the predicate
insert_these_df = prices_df.copy()[['cdate_s','openp','closep']][pred3_sr]
# I should create the diff-column
insert_these_df['diff'] = insert_these_df.closep - insert_these_df.openp
# I should do the insert:
prices3_df              = pd.concat((prices3_df,insert_these_df))

# I should report:
print(prices3_df.sort_values(['cdate_s']))

# Mimic
# UPDATE prices3 SET diff = 0;

prices3_df['diff'] = 0

# I should report:
print(prices3_df.sort_values(['cdate_s']))

'bye'

I saw this:


dan@h79:~/ml4/public/class03demos $ python class03pd26.py
          cdate_s       openp      closep      diff
13739  2016-08-01  155.089142  161.449997  6.360855
13745  2016-08-09  157.957336  161.770004  3.812668
13746  2016-08-10  157.928146  162.080002  4.151856
13747  2016-08-11  157.986557  163.529999  5.543442
13748  2016-08-12  158.901871  161.949997  3.048126
13749  2016-08-15  158.132614  161.880005  3.747391
13750  2016-08-16  156.876511  160.699997  3.823486
13751  2016-08-17  156.740189  160.440002  3.699813
13752  2016-08-18  156.662292  161.360001  4.697709
13753  2016-08-19  156.613617  160.039993  3.426376
13754  2016-08-22  155.795685  160.000000  4.204315
13755  2016-08-23  156.117004  160.259995  4.142991
13756  2016-08-24  155.688583  159.050003  3.361420
13757  2016-08-25  154.821960  158.630005  3.808045
13758  2016-08-26  154.705109  158.320007  3.614898
13759  2016-08-29  154.656433  159.720001  5.063568
13760  2016-08-30  155.561996  159.399994  3.837998
13761  2016-08-31  155.464615  158.880005  3.415390
          cdate_s       openp      closep      diff
13739  2016-08-01  155.089142  161.449997  6.360855
13740  2016-08-02  155.687683  160.580002  4.892319
13741  2016-08-03  154.587158  160.669998  6.082840
13742  2016-08-04  155.291885  161.550003  6.258118
13743  2016-08-05  156.392410  163.500000  7.107590
13744  2016-08-08  158.453949  162.039993  3.586044
13745  2016-08-09  157.957336  161.770004  3.812668
13746  2016-08-10  157.928146  162.080002  4.151856
13747  2016-08-11  157.986557  163.529999  5.543442
13748  2016-08-12  158.901871  161.949997  3.048126
13749  2016-08-15  158.132614  161.880005  3.747391
13750  2016-08-16  156.876511  160.699997  3.823486
13751  2016-08-17  156.740189  160.440002  3.699813
13752  2016-08-18  156.662292  161.360001  4.697709
13753  2016-08-19  156.613617  160.039993  3.426376
13754  2016-08-22  155.795685  160.000000  4.204315
13755  2016-08-23  156.117004  160.259995  4.142991
13756  2016-08-24  155.688583  159.050003  3.361420
13757  2016-08-25  154.821960  158.630005  3.808045
13758  2016-08-26  154.705109  158.320007  3.614898
13759  2016-08-29  154.656433  159.720001  5.063568
13760  2016-08-30  155.561996  159.399994  3.837998
13761  2016-08-31  155.464615  158.880005  3.415390
          cdate_s       openp      closep  diff
13739  2016-08-01  155.089142  161.449997     0
13740  2016-08-02  155.687683  160.580002     0
13741  2016-08-03  154.587158  160.669998     0
13742  2016-08-04  155.291885  161.550003     0
13743  2016-08-05  156.392410  163.500000     0
13744  2016-08-08  158.453949  162.039993     0
13745  2016-08-09  157.957336  161.770004     0
13746  2016-08-10  157.928146  162.080002     0
13747  2016-08-11  157.986557  163.529999     0
13748  2016-08-12  158.901871  161.949997     0
13749  2016-08-15  158.132614  161.880005     0
13750  2016-08-16  156.876511  160.699997     0
13751  2016-08-17  156.740189  160.440002     0
13752  2016-08-18  156.662292  161.360001     0
13753  2016-08-19  156.613617  160.039993     0
13754  2016-08-22  155.795685  160.000000     0
13755  2016-08-23  156.117004  160.259995     0
13756  2016-08-24  155.688583  159.050003     0
13757  2016-08-25  154.821960  158.630005     0
13758  2016-08-26  154.705109  158.320007     0
13759  2016-08-29  154.656433  159.720001     0
13760  2016-08-30  155.561996  159.399994     0
13761  2016-08-31  155.464615  158.880005     0
dan@h79:~/ml4/public/class03demos $ 
dan@h79:~/ml4/public/class03demos $

Class03 Lab


ml4.us About Blog Contact Class01 Class02 Class03 Class04 Class05 Class06 Class07 Class08 Class09 Class10 dan101 Forum Google Hangout Vboxen