@ -8,7 +8,7 @@ import unittest
from imports import import_xls
def _get_fixture ( filename ) :
return [ os . path . join ( os . path . dirname ( __file__ ) , " fixtures " , filename ) , filename ]
return [ os . path . join ( os . path . dirname ( __file__ ) , " fixtures " , filename ) ]
class TestImportXLS ( unittest . TestCase ) :
@ -42,7 +42,7 @@ class TestImportXLS(unittest.TestCase):
{ " type " : " Any " , " id " : " corner-cases " } )
self . assertEqual ( parsed_file [ 1 ] [ 0 ] [ " table_data " ] [ 3 ] ,
# The type is detected as text, so all values should be text.
[ u ' =function() ' , u ' 3 .0 ' , u ' two spaces after ' ,
[ u ' =function() ' , u ' 3 ' , u ' two spaces after ' ,
u ' two spaces before ' , u ' !@#$ ' , u ' €€€ ' , u ' √∫abc$$ ' , u ' line \n break ' ] )
# check that multiple tables are created when there are multiple sheets in a document
@ -53,12 +53,12 @@ class TestImportXLS(unittest.TestCase):
def test_excel_types ( self ) :
parsed_file = import_xls . parse_file ( * _get_fixture ( ' test_excel_types.xlsx ' ) )
sheet = parsed_file [ 1 ] [ 0 ]
self . _check_col ( sheet , 0 , " int1 " , " Numeric " , [ - 1234123 , ' ' , ' ' ] )
self . _check_col ( sheet , 1 , " int2 " , " Numeric " , [ 5 , ' ' , ' ' ] )
self . _check_col ( sheet , 0 , " int1 " , " Numeric " , [ - 1234123 , None , None ] )
self . _check_col ( sheet , 1 , " int2 " , " Numeric " , [ 5 , None , None ] )
self . _check_col ( sheet , 2 , " textint " , " Any " , [ " 12345678902345689 " , ' ' , ' ' ] )
self . _check_col ( sheet , 3 , " bigint " , " Any " , [ " 320150170634561830 " , ' ' , ' ' ] )
self . _check_col ( sheet , 4 , " num2 " , " Numeric " , [ 123456789.123456 , ' ' , ' ' ] )
self . _check_col ( sheet , 5 , " bignum " , " Numeric " , [ math . exp ( 200 ) , ' ' , ' ' ] )
self . _check_col ( sheet , 4 , " num2 " , " Numeric " , [ 123456789.123456 , None , None ] )
self . _check_col ( sheet , 5 , " bignum " , " Numeric " , [ math . exp ( 200 ) , None , None ] )
self . _check_col ( sheet , 6 , " date1 " , " DateTime " ,
[ calendar . timegm ( datetime . datetime ( 2015 , 12 , 22 , 11 , 59 , 00 ) . timetuple ( ) ) , None , None ] )
self . _check_col ( sheet , 7 , " date2 " , " Date " ,
@ -78,7 +78,7 @@ class TestImportXLS(unittest.TestCase):
None , 1452038400.0 , 1451549340.0 , 1483214940.0 , None ,
1454544000.0 , 1199577600.0 , 1451692800.0 , 1451549340.0 , 1483214940.0 ] )
self . _check_col ( sheet , 1 , " float_not_int " , " Numeric " ,
[ 1 , 2 , 3 , 4 , 5 , " " , 6 , 7 , 8 , 9 , 10 , 10.25 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 ] )
[ 1 , 2 , 3 , 4 , 5 , None , 6 , 7 , 8 , 9 , 10 , 10.25 , 11 , 12 , 13 , 14 , 15 , 16 , 17 , 18 ] )
self . _check_col ( sheet , 2 , " int_not_bool " , " Any " ,
[ 0 , 0 , 1 , 0 , 1 , 0 , 0 , 1 , 0 , 2 , 0 , 0 , 1 , 0 , 0 , 1 , 0 , 0 , 1 , 0 ] )
self . _check_col ( sheet , 3 , " float_not_bool " , " Any " ,
@ -91,12 +91,11 @@ class TestImportXLS(unittest.TestCase):
[ 4.0 , 6.0 , 4.0 , 4.0 , 6.0 , 4.0 , ' -- ' , 6.0 , 4.0 , 4.0 , 4.0 , 4.0 , 4.0 , 6.0 , 6.0 ,
4.0 , 6.0 , ' 3-4 ' , 4.0 , 6.5 ] )
self . _check_col ( sheet , 7 , " float_not_text " , " Numeric " ,
[ - 10.25 , - 8.00 , - 5.75 , - 3.50 , " n/a " , ' 1. ' , " ??? " , 5.50 , " " , " - " ,
12.25 , 0.00 , " " , 0.00 , " -- " , 23.50 , " NA " , 28.00 , 30.25 , 32.50 ] )
[ - 10.25 , - 8.00 , - 5.75 , - 3.50 , " n/a " , ' 1. ' , " ??? " , 5.50 , None , " - " ,
12.25 , 0.00 , None , 0.00 , " -- " , 23.50 , " NA " , 28.00 , 30.25 , 32.50 ] )
def test_excel_single_merged_cell ( self ) :
# An older version of xlrd had a bug where a single cell marked as 'merged' would cause an
# exception.
# An older version had a bug where a single cell marked as 'merged' would cause an exception.
parsed_file = import_xls . parse_file ( * _get_fixture ( ' test_single_merged_cell.xlsx ' ) )
tables = parsed_file [ 1 ]
self . assertEqual ( tables , [ {
@ -110,21 +109,19 @@ class TestImportXLS(unittest.TestCase):
] ,
' table_data ' : [
[ u ' SINGLE MERGED ' , u ' The End ' ] ,
[ 1637384.52 , u ' ' ] ,
[ 2444344.06 , u ' ' ] ,
[ 2444344.06 , u ' ' ] ,
[ 1637384.52 , None ] ,
[ 2444344.06 , None ] ,
[ 2444344.06 , None ] ,
[ u ' ' , u ' ' ] ,
] ,
} ] )
def test_excel_strange_dates ( self ) :
# TODO fails with xlrd.xldate.XLDateAmbiguous: 4.180902777777778
# Check that we don't fail when encountering unusual dates and times (e.g. 0 or 38:00:00).
parsed_file = import_xls . parse_file ( * _get_fixture ( ' strange_dates.xlsx ' ) )
tables = parsed_file [ 1 ]
# We test non-failure, but the result is not really what we want. E.g. "1:10" and "100:20:30"
# would be best left as text, but here become "01:10:00" (after xlrd parses the first as
# datetime.time), and as 4.18... (after xlrd fails and we resort to the numerical value).
# would be best left as text.
self . assertEqual ( tables , [ {
' table_name ' : u ' Sheet1 ' ,
' column_metadata ' : [
@ -132,22 +129,41 @@ class TestImportXLS(unittest.TestCase):
{ ' id ' : ' b ' , ' type ' : ' Date ' } ,
{ ' id ' : ' c ' , ' type ' : ' Any ' } ,
{ ' id ' : ' d ' , ' type ' : ' Any ' } ,
{ ' id ' : ' e ' , ' type ' : ' Numeric ' } ,
{ ' id ' : ' f ' , ' type ' : ' Numeric ' } ,
{ ' id ' : ' e ' , ' type ' : ' DateTime ' } ,
{ ' id ' : ' f ' , ' type ' : ' Date ' } ,
{ ' id ' : ' g ' , ' type ' : ' Any ' } ,
{ ' id ' : ' h ' , ' type ' : ' Date ' } ,
{ ' id ' : ' i ' , ' type ' : ' Numeric ' } ,
{ ' id ' : ' i ' , ' type ' : ' Date ' } ,
] ,
' table_data ' : [
[ u ' 21:14:00 ' ] ,
[ 1568851200.0 ] ,
[ u ' 01:10:00 ' ] ,
[ u ' 10:20:30 ' ] ,
[ 4.180902777777778 ] ,
[ 2 0] ,
[ - 2208713970.0 ] ,
[ - 2207347200. 0] ,
[ u ' 7/4/1776 ' ] ,
[ 205286400.0 ] ,
[ 0 ] ,
[ - 2209161600.0 ] ,
] ,
} ] )
def test_empty_rows ( self ) :
# Check that empty rows aren't imported,
# and that files with lots of empty rows are imported quickly.
# The fixture file is mostly empty but has data in the last row,
# with over a million empty rows in between.
parsed_file = import_xls . parse_file ( * _get_fixture ( ' test_empty_rows.xlsx ' ) )
tables = parsed_file [ 1 ]
self . assertEqual ( tables , [ {
' table_name ' : u ' Sheet1 ' ,
' column_metadata ' : [
{ ' id ' : ' a ' , ' type ' : ' Numeric ' } ,
{ ' id ' : ' b ' , ' type ' : ' Numeric ' } ,
] ,
' table_data ' : [
[ 0 , None , 1 ] ,
[ None , 0 , 2 ] ,
] ,
} ] )