mirror of
				https://github.com/gristlabs/grist-core.git
				synced 2025-06-13 20:53:59 +00:00 
			
		
		
		
	(core) updates from grist-core
This commit is contained in:
		
						commit
						9e681677a3
					
				@ -4,6 +4,7 @@ Helper functions for import plugins
 | 
				
			|||||||
import itertools
 | 
					import itertools
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					from collections import defaultdict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import six
 | 
					import six
 | 
				
			||||||
from six.moves import zip
 | 
					from six.moves import zip
 | 
				
			||||||
@ -14,6 +15,20 @@ if six.PY2:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
log = logging.getLogger(__name__)
 | 
					log = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def column_count_modal(rows):
 | 
				
			||||||
 | 
					  """ Return the modal value of columns in the row_set's
 | 
				
			||||||
 | 
					  sample. This can be assumed to be the number of columns
 | 
				
			||||||
 | 
					  of the table. """
 | 
				
			||||||
 | 
					  counts = defaultdict(int)
 | 
				
			||||||
 | 
					  for row in rows:
 | 
				
			||||||
 | 
					    length = len([c for c in row if not empty(c)])
 | 
				
			||||||
 | 
					    if length > 1:
 | 
				
			||||||
 | 
					      counts[length] += 1
 | 
				
			||||||
 | 
					  if not len(counts):
 | 
				
			||||||
 | 
					    return 0
 | 
				
			||||||
 | 
					  return max(list(counts.items()), key=lambda k_v: k_v[1])[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def empty(value):
 | 
					def empty(value):
 | 
				
			||||||
  """ Stringify the value and check that it has a length. """
 | 
					  """ Stringify the value and check that it has a length. """
 | 
				
			||||||
@ -33,7 +48,7 @@ def capitalize(word):
 | 
				
			|||||||
  return word[0].capitalize() + word[1:]
 | 
					  return word[0].capitalize() + word[1:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def _is_numeric(text):
 | 
					def _is_numeric(text):
 | 
				
			||||||
  for t in six.integer_types + (float, complex):
 | 
					  for t in six.integer_types + (float,):
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
      t(text)
 | 
					      t(text)
 | 
				
			||||||
      return True
 | 
					      return True
 | 
				
			||||||
@ -54,7 +69,6 @@ def _is_header(header, data_rows):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  # If it's all text, see if the values in the first row repeat in other rows. That's uncommon for
 | 
					  # If it's all text, see if the values in the first row repeat in other rows. That's uncommon for
 | 
				
			||||||
  # a header.
 | 
					  # a header.
 | 
				
			||||||
  count_repeats = [0 for cell in header]
 | 
					 | 
				
			||||||
  for row in data_rows:
 | 
					  for row in data_rows:
 | 
				
			||||||
    for cell, header_cell in zip(row, header):
 | 
					    for cell, header_cell in zip(row, header):
 | 
				
			||||||
      if cell and cell == header_cell:
 | 
					      if cell and cell == header_cell:
 | 
				
			||||||
@ -78,8 +92,11 @@ def find_first_non_empty_row(rows):
 | 
				
			|||||||
  Returns (data_offset, header) of the first row with non-empty fields
 | 
					  Returns (data_offset, header) of the first row with non-empty fields
 | 
				
			||||||
  or (0, []) if there are no non-empty rows.
 | 
					  or (0, []) if there are no non-empty rows.
 | 
				
			||||||
  """
 | 
					  """
 | 
				
			||||||
 | 
					  tolerance = 1
 | 
				
			||||||
 | 
					  modal = column_count_modal(rows)
 | 
				
			||||||
  for i, row in enumerate(rows):
 | 
					  for i, row in enumerate(rows):
 | 
				
			||||||
    if _count_nonempty(row) > 0:
 | 
					    length = _count_nonempty(row)
 | 
				
			||||||
 | 
					    if length >= modal - tolerance:
 | 
				
			||||||
      return i + 1, row
 | 
					      return i + 1, row
 | 
				
			||||||
  # No non-empty rows.
 | 
					  # No non-empty rows.
 | 
				
			||||||
  return 0, []
 | 
					  return 0, []
 | 
				
			||||||
 | 
				
			|||||||
@ -4,7 +4,6 @@ and returns a object formatted so that it can be used by grist for a bulk add re
 | 
				
			|||||||
"""
 | 
					"""
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import messytables
 | 
					 | 
				
			||||||
import six
 | 
					import six
 | 
				
			||||||
import openpyxl
 | 
					import openpyxl
 | 
				
			||||||
from openpyxl.utils.datetime import from_excel
 | 
					from openpyxl.utils.datetime import from_excel
 | 
				
			||||||
@ -66,15 +65,10 @@ def parse_open_file(file_obj):
 | 
				
			|||||||
      # `if not any(row)` would be slightly faster, but would count `0` as empty.
 | 
					      # `if not any(row)` would be slightly faster, but would count `0` as empty.
 | 
				
			||||||
      if not set(row) <= {None, ""}
 | 
					      if not set(row) <= {None, ""}
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    sample = [
 | 
					    # Resetting dimensions via openpyxl causes rows to not be padded. Make sure
 | 
				
			||||||
      # Create messytables.Cells for the sake of messytables.headers_guess
 | 
					    # sample rows are padded; get_table_data will handle padding the rest.
 | 
				
			||||||
      [messytables.Cell(cell) for cell in row]
 | 
					    sample = _with_padding(rows[:1000])
 | 
				
			||||||
      # Resetting dimensions via openpyxl causes rows to not be padded. Make sure
 | 
					    data_offset, headers = import_utils.headers_guess(sample)
 | 
				
			||||||
      # sample rows are padded; get_table_data will handle padding the rest.
 | 
					 | 
				
			||||||
      for row in _with_padding(rows[:1000])
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
    offset, headers = messytables.headers_guess(sample)
 | 
					 | 
				
			||||||
    data_offset = offset + 1  # Add the header line
 | 
					 | 
				
			||||||
    rows = rows[data_offset:]
 | 
					    rows = rows[data_offset:]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Make sure all header values are strings.
 | 
					    # Make sure all header values are strings.
 | 
				
			||||||
 | 
				
			|||||||
@ -13,6 +13,8 @@ def _get_fixture(filename):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class TestImportXLS(unittest.TestCase):
 | 
					class TestImportXLS(unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  maxDiff = None  # Display full diff if any.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def _check_col(self, sheet, index, name, typename, values):
 | 
					  def _check_col(self, sheet, index, name, typename, values):
 | 
				
			||||||
    self.assertEqual(sheet["column_metadata"][index]["id"], name)
 | 
					    self.assertEqual(sheet["column_metadata"][index]["id"], name)
 | 
				
			||||||
    self.assertEqual(sheet["column_metadata"][index]["type"], typename)
 | 
					    self.assertEqual(sheet["column_metadata"][index]["type"], typename)
 | 
				
			||||||
@ -103,17 +105,17 @@ class TestImportXLS(unittest.TestCase):
 | 
				
			|||||||
      'table_name': u'Transaction Report',
 | 
					      'table_name': u'Transaction Report',
 | 
				
			||||||
      'column_metadata': [
 | 
					      'column_metadata': [
 | 
				
			||||||
        {'type': 'Any', 'id': u''},
 | 
					        {'type': 'Any', 'id': u''},
 | 
				
			||||||
        {'type': 'Numeric', 'id': u'Start'},
 | 
					        {'type': 'Any', 'id': u''},
 | 
				
			||||||
        {'type': 'Numeric', 'id': u''},
 | 
					        {'type': 'Numeric', 'id': u''},
 | 
				
			||||||
        {'type': 'Numeric', 'id': u''},
 | 
					        {'type': 'Numeric', 'id': u''},
 | 
				
			||||||
        {'type': 'Any', 'id': u'Seek no easy ways'},
 | 
					        {'type': 'Any', 'id': u''},
 | 
				
			||||||
      ],
 | 
					      ],
 | 
				
			||||||
      'table_data': [
 | 
					      'table_data': [
 | 
				
			||||||
        [u'SINGLE MERGED', u'The End'],
 | 
					        ['', u'SINGLE MERGED', u'The End'],
 | 
				
			||||||
        [1637384.52, None],
 | 
					        ['Start', '1637384.52', ''],
 | 
				
			||||||
        [2444344.06, None],
 | 
					        [None, 2444344.06, None],
 | 
				
			||||||
        [2444344.06, None],
 | 
					        [None, 2444344.06, None],
 | 
				
			||||||
        [u'', u''],
 | 
					        ['Seek no easy ways', u'', u''],
 | 
				
			||||||
      ],
 | 
					      ],
 | 
				
			||||||
    }])
 | 
					    }])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -19,7 +19,6 @@ jdcal==1.4.1
 | 
				
			|||||||
json_table_schema==0.2.1
 | 
					json_table_schema==0.2.1
 | 
				
			||||||
lazy_object_proxy==1.6.0
 | 
					lazy_object_proxy==1.6.0
 | 
				
			||||||
lxml==4.6.3                # used in csv plugin only?
 | 
					lxml==4.6.3                # used in csv plugin only?
 | 
				
			||||||
messytables==0.15.2
 | 
					 | 
				
			||||||
python_dateutil==2.8.2
 | 
					python_dateutil==2.8.2
 | 
				
			||||||
openpyxl==3.0.10
 | 
					openpyxl==3.0.10
 | 
				
			||||||
python_magic==0.4.12
 | 
					python_magic==0.4.12
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user