mirror of
				https://github.com/gristlabs/grist-core.git
				synced 2025-06-13 20:53:59 +00:00 
			
		
		
		
	(core) Handle importing xls files with invalid dimensions
Summary: This addresses a rare bug where xls files with invalid dimensions could not be imported into Grist due to how openpyxl handles parsing them. Test Plan: Server test. Reviewers: alexmojaki Reviewed By: alexmojaki Differential Revision: https://phab.getgrist.com/D3485
This commit is contained in:
		
							parent
							
								
									561d9696aa
								
							
						
					
					
						commit
						9b08666f96
					
				
							
								
								
									
										
											BIN
										
									
								
								sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								sandbox/grist/imports/fixtures/test_invalid_dimensions.xlsx
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -39,6 +39,10 @@ def parse_open_file(file_obj):
 | 
			
		||||
  export_list = []
 | 
			
		||||
  # A table set is a collection of tables:
 | 
			
		||||
  for sheet in workbook:
 | 
			
		||||
    # openpyxl fails to read xlsx files with incorrect dimensions; we reset here as a precaution.
 | 
			
		||||
    # See https://openpyxl.readthedocs.io/en/stable/optimized.html#worksheet-dimensions.
 | 
			
		||||
    sheet.reset_dimensions()
 | 
			
		||||
 | 
			
		||||
    table_name = sheet.title
 | 
			
		||||
    rows = [
 | 
			
		||||
      list(row)
 | 
			
		||||
@ -50,7 +54,9 @@ def parse_open_file(file_obj):
 | 
			
		||||
    sample = [
 | 
			
		||||
      # Create messytables.Cells for the sake of messytables.headers_guess
 | 
			
		||||
      [messytables.Cell(cell) for cell in row]
 | 
			
		||||
      for row in rows[:1000]
 | 
			
		||||
      # Resetting dimensions via openpyxl causes rows to not be padded. Make sure
 | 
			
		||||
      # sample rows are padded; get_table_data will handle padding the rest.
 | 
			
		||||
      for row in _with_padding(rows[:1000])
 | 
			
		||||
    ]
 | 
			
		||||
    offset, headers = messytables.headers_guess(sample)
 | 
			
		||||
    data_offset = offset + 1  # Add the header line
 | 
			
		||||
@ -100,3 +106,14 @@ def parse_open_file(file_obj):
 | 
			
		||||
 | 
			
		||||
  parse_options = {}
 | 
			
		||||
  return parse_options, export_list
 | 
			
		||||
 | 
			
		||||
def _with_padding(rows):
 | 
			
		||||
  if not rows:
 | 
			
		||||
    return []
 | 
			
		||||
  max_width = max(len(row) for row in rows)
 | 
			
		||||
  min_width = min(len(row) for row in rows)
 | 
			
		||||
  if min_width == max_width:
 | 
			
		||||
    return rows
 | 
			
		||||
  for row in rows:
 | 
			
		||||
    row.extend([""] * (max_width - len(row)))
 | 
			
		||||
  return rows
 | 
			
		||||
 | 
			
		||||
@ -163,7 +163,27 @@ class TestImportXLS(unittest.TestCase):
 | 
			
		||||
      ],
 | 
			
		||||
      'table_data': [
 | 
			
		||||
        [0, None, 1],
 | 
			
		||||
        [None, 0, 2],
 | 
			
		||||
        [u'', 0, 2],
 | 
			
		||||
      ],
 | 
			
		||||
    }])
 | 
			
		||||
 | 
			
		||||
  def test_invalid_dimensions(self):
 | 
			
		||||
    # Check that files with invalid dimensions (typically a result of software
 | 
			
		||||
    # incorrectly writing the xlsx file) are imported correctly. Previously, Grist
 | 
			
		||||
    # would fail to import any rows from such files due to how openpyxl parses them.
 | 
			
		||||
    parsed_file = import_xls.parse_file(*_get_fixture('test_invalid_dimensions.xlsx'))
 | 
			
		||||
    tables = parsed_file[1]
 | 
			
		||||
    self.assertEqual(tables, [{
 | 
			
		||||
      'table_name': 'Sheet1',
 | 
			
		||||
      'column_metadata': [
 | 
			
		||||
        {'id': u'A', 'type': 'Numeric'},
 | 
			
		||||
        {'id': u'B', 'type': 'Numeric'},
 | 
			
		||||
        {'id': u'C', 'type': 'Numeric'},
 | 
			
		||||
      ],
 | 
			
		||||
      'table_data': [
 | 
			
		||||
        [1, 2, 3],
 | 
			
		||||
        [4, 5, 6],
 | 
			
		||||
        [7, 8, 9],
 | 
			
		||||
      ],
 | 
			
		||||
    }])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user