OmniSciDB  6686921089
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
test_readcsv.py
Go to the documentation of this file.
1 import os
2 import io
3 import datetime
4 import pytest
5 import pyarrow as pa
6 from pyarrow import csv
7 import omniscidbe as dbe
8 import ctypes
9 ctypes._dlopen('libDBEngine.so', ctypes.RTLD_GLOBAL)
10 
11 root = os.path.join(
12  os.path.dirname(
13  os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
14  "Tests/Import/datafiles"
15 )
16 
17 def test_init():
18  global engine
19  engine = dbe.PyDbEngine(
20  enable_union=1,
21  enable_columnar_output=1,
22  enable_lazy_fetch=0,
23  null_div_by_zero=1,
24  )
25  assert bool(engine.closed) == False
26 
27 engine = None
28 
30  table = csv.read_csv(root + "/santander_top1000.csv")
31  assert table
32  engine.importArrowTable("santander", table)
33  assert bool(engine.closed) == False
34  r = engine.executeDML("select * from santander")
35  assert r
36  assert r.colCount() == 202
37  assert r.rowCount() == 999
38 
40  target = {
41  'a': [1, 2, 3, 4, 5, 6],
42  'b': [2, 3, 4, 5, 6, 7],
43  'c': [3, 4, 5, 6, 7, 8],
44  'd': [4, 5, 6, 7, 8, 9],
45  'e': ['5', '6', '7', '8', '9', '0']
46  }
47  fp = io.BytesIO(
48  b'a,b,c,d,e\n1,2,3,4,5\n2,3,4,5,6\n3,4,5,6,7\n4,5,6,7,8\n5,6,7,8,9\n6,7,8,9,0'
49  )
50  fp.seek(0)
51  table = csv.read_csv(
52  fp,
53  convert_options=csv.ConvertOptions(
54  column_types={
55  'a': pa.int32(),
56  'b': pa.int64(),
57  'c': pa.int64(),
58  'd': pa.int64(),
59  'e': pa.string(),
60  }
61  )
62  )
63  assert table
64  engine.importArrowTable("usecols", table)
65  assert bool(engine.closed) == False
66  cursor = engine.executeDML("select * from usecols")
67  assert cursor
68  batch = cursor.getArrowRecordBatch()
69  assert batch
70  assert batch.to_pydict() == target
71 
73  target = {
74  'timestamp': [datetime.datetime(2010, 4, 1, 0, 0), datetime.datetime(2010, 4, 1, 0, 30), datetime.datetime(2010, 4, 1, 1, 0)],
75  'symbol': ['USD/JPY', 'USD/JPY', 'USD/JPY'],
76  'high': [93.526, 93.475, 93.421],
77  'low': [93.361, 93.352, 93.326],
78  'open': [93.518, 93.385, 93.391],
79  'close': [93.382, 93.391, 93.384],
80  'spread': [0.005, 0.006, 0.006],
81  'volume': [3049, 2251, 1577]
82  }
83  fp = io.BytesIO(
84  b'timestamp,symbol,high,low,open,close,spread,volume\n'
85  b'2010-04-01 00:00:00,USD/JPY,93.52600,93.36100,93.51800,93.38200,0.00500,3049\n'
86  b'2010-04-01 00:30:00,USD/JPY,93.47500,93.35200,93.38500,93.39100,0.00600,2251\n'
87  b'2010-04-01 01:00:00,USD/JPY,93.42100,93.32600,93.39100,93.38400,0.00600,1577\n'
88  )
89  fp.seek(0)
90  table = csv.read_csv(fp)
91  assert table
92  engine.importArrowTable("time_parsing", table)
93  assert bool(engine.closed) == False
94  cursor = engine.executeDML("select * from time_parsing")
95  assert cursor
96  batch = cursor.getArrowRecordBatch()
97  assert batch
98  assert batch.to_pydict() == target
99 
101  target = {
102  'CRIM': [0.00632],
103  'ZN': [18.0],
104  'INDUS': [2.31],
105  'CHAS': [0.0],
106  'NOX': [0.538],
107  'RM': [6.575],
108  'AGE': [65.2],
109  'DIS': [4.09],
110  'RAD': [1.0],
111  'TAX': [296.0],
112  'PTRATIO': [15.3],
113  'B': [396.9],
114  'LSTAT': [4.98],
115  'PRICE': [24.0]
116  }
117  fp = io.BytesIO(
118  b',CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE\n'
119  b'0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0\n'
120  )
121  fp.seek(0)
122  table = csv.read_csv(fp)
123  assert table
124  engine.importArrowTable("csv_fillna", table)
125  assert bool(engine.closed) == False
126  cursor = engine.executeDML("select CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE from csv_fillna")
127  assert cursor
128  batch = cursor.getArrowRecordBatch()
129  assert batch
130  assert batch.to_pydict() == target
131 
133  target = {'a': [1, 2, 3], 'b': [1, 2, 3], 'c': [None, None, None]}
134  fp = io.BytesIO(b'a,b,c\n1,1,\n2,2,\n3,3,\n')
135  fp.seek(0)
136  table = csv.read_csv(
137  fp,
138  convert_options=csv.ConvertOptions(
139  column_types={
140  'a': pa.int32(),
141  'b': pa.int64(),
142  'c': pa.int64(),
143  }
144  )
145  )
146  assert table
147  engine.importArrowTable("test_null_col", table)
148  assert bool(engine.closed) == False
149  cursor = engine.executeDML("select * from test_null_col")
150  assert cursor
151  batch = cursor.getArrowRecordBatch()
152  assert batch
153  assert batch.to_pydict() == target
154 
155 
156 if __name__ == "__main__":
157  pytest.main(["-v", __file__])
158 
def test_usecols_csv
Definition: test_readcsv.py:39
def test_time_parsing
Definition: test_readcsv.py:72
def test_santander
Definition: test_readcsv.py:29