diff --git a/app/server/lib/Export.ts b/app/server/lib/Export.ts index fb0beea1..d89c0644 100644 --- a/app/server/lib/Export.ts +++ b/app/server/lib/Export.ts @@ -10,6 +10,7 @@ import {TableData} from 'app/common/TableData'; import {ActiveDoc} from 'app/server/lib/ActiveDoc'; import {RequestWithLogin} from 'app/server/lib/Authorizer'; import {docSessionFromRequest} from 'app/server/lib/DocSession'; +import { integerParam, optJsonParam, stringParam} from 'app/server/lib/requestUtils'; import {ServerColumnGetters} from 'app/server/lib/ServerColumnGetters'; import * as express from 'express'; import * as _ from 'underscore'; @@ -68,17 +69,17 @@ export interface ExportParameters { tableId: string; viewSectionId: number; sortOrder: number[]; - filters: Filter[] + filters: Filter[]; } /** * Gets export parameters from a request. */ export function parseExportParameters(req: express.Request): ExportParameters { - const tableId = req.query.tableId; - const viewSectionId = parseInt(req.query.viewSection, 10); - const sortOrder = gutil.safeJsonParse(req.query.activeSortSpec, null) as number[]; - const filters: Filter[] = gutil.safeJsonParse(req.query.filters, []) || []; + const tableId = stringParam(req.query.tableId); + const viewSectionId = integerParam(req.query.viewSection); + const sortOrder = optJsonParam(req.query.activeSortSpec, []) as number[]; + const filters: Filter[] = optJsonParam(req.query.filters, []); return { tableId, diff --git a/app/server/lib/GoogleAuth.ts b/app/server/lib/GoogleAuth.ts index 31610837..0e94a213 100644 --- a/app/server/lib/GoogleAuth.ts +++ b/app/server/lib/GoogleAuth.ts @@ -3,7 +3,7 @@ import {ApiError} from 'app/common/ApiError'; import {parseSubdomain} from 'app/common/gristUrls'; import {expressWrap} from 'app/server/lib/expressWrap'; import * as log from 'app/server/lib/log'; -import {getOriginUrl} from 'app/server/lib/requestUtils'; +import {getOriginUrl, optStringParam, stringParam} from 'app/server/lib/requestUtils'; import * as express from 'express'; import {URL} from 'url'; @@ -93,13 +93,13 @@ export async function googleAuthTokenMiddleware( res: express.Response, next: express.NextFunction) { // If access token is in place, proceed - if (!req.query.code) { + if (!optStringParam(req.query.code)) { throw new ApiError("Google Auth endpoint requires a code parameter in the query string", 400); } else { try { const oAuth2Client = _googleAuthClient(); // Decrypt code that was send back from Google Auth service. Uses GOOGLE_CLIENT_SECRET key. - const tokenResponse = await oAuth2Client.getToken(req.query.code); + const tokenResponse = await oAuth2Client.getToken(stringParam(req.query.code)); // Get the access token (access token will be present in a default request configuration). const access_token = tokenResponse.tokens.access_token!; req.query.access_token = access_token; @@ -122,7 +122,7 @@ export function addGoogleAuthEndpoint( messagePage: (req: express.Request, res: express.Response, message: any) => any ) { if (!process.env.GOOGLE_CLIENT_SECRET) { - log.error("Failed to create GoogleAuth endpoint: GOOGLE_CLIENT_SECRET is not defined"); + log.warn("Failed to create GoogleAuth endpoint: GOOGLE_CLIENT_SECRET is not defined"); expressApp.get(authHandlerPath, expressWrap(async (req: express.Request, res: express.Response) => { throw new Error("Send to Google Drive is not configured."); })); @@ -136,20 +136,22 @@ export function addGoogleAuthEndpoint( // our request. It is encrypted (with CLIENT_SECRET) and signed with redirect url. // In state query parameter we will receive an url that was send as part of the request to Google. - if (req.query.code) { + if (optStringParam(req.query.code)) { log.debug("GoogleAuth - response from Google with valid code"); - messagePage(req, res, { code: req.query.code, origin: req.query.state }); - } else if (req.query.error) { - log.debug("GoogleAuth - response from Google with error code", req.query.error); - if (req.query.error === "access_denied") { - messagePage(req, res, { error: req.query.error, origin: req.query.state }); + messagePage(req, res, { code: stringParam(req.query.code), + origin: stringParam(req.query.state) }); + } else if (optStringParam(req.query.error)) { + log.debug("GoogleAuth - response from Google with error code", stringParam(req.query.error)); + if (stringParam(req.query.error) === "access_denied") { + messagePage(req, res, { error: stringParam(req.query.error), + origin: stringParam(req.query.state) }); } else { // This should not happen, either code or error is a mandatory query parameter. throw new ApiError("Error authenticating with Google", 500); } } else { const oAuth2Client = _googleAuthClient(); - const scope = req.query.scope || DRIVE_SCOPE; + const scope = optStringParam(req.query.scope) || DRIVE_SCOPE; // Create url for origin parameter for a popup window. const origin = getOriginUrl(req); const authUrl = oAuth2Client.generateAuthUrl({ diff --git a/app/server/lib/GoogleExport.ts b/app/server/lib/GoogleExport.ts index 5c94e319..54ba28d0 100644 --- a/app/server/lib/GoogleExport.ts +++ b/app/server/lib/GoogleExport.ts @@ -3,6 +3,7 @@ import {ActiveDoc} from 'app/server/lib/ActiveDoc'; import {RequestWithLogin} from 'app/server/lib/Authorizer'; import {makeXLSX} from 'app/server/lib/ExportXLSX'; import * as log from 'app/server/lib/log'; +import {optStringParam} from 'app/server/lib/requestUtils'; import {Request, Response} from 'express'; import {PassThrough} from 'stream'; @@ -16,7 +17,7 @@ export async function exportToDrive( res: Response ) { // Token should come from auth middleware - const access_token = req.query.access_token; + const access_token = optStringParam(req.query.access_token); if (!access_token) { throw new Error("No access token - Can't send file to Google Drive"); } @@ -78,6 +79,6 @@ async function sendFileToDrive(fileNameNoExt: string, data: ArrayBuffer, oauth_t // Makes excel file the same way as export to excel works. async function prepareFile(doc: ActiveDoc, req: Request) { const data = await makeXLSX(doc, req); - const name = (req.query.title || doc.docName); + const name = (optStringParam(req.query.title) || doc.docName); return { name, data }; } diff --git a/app/server/lib/NSandbox.ts b/app/server/lib/NSandbox.ts index 255c17c1..d9083cd7 100644 --- a/app/server/lib/NSandbox.ts +++ b/app/server/lib/NSandbox.ts @@ -12,21 +12,59 @@ import {ChildProcess, spawn} from 'child_process'; import * as path from 'path'; import {Stream, Writable} from 'stream'; import * as _ from 'lodash'; -import * as fs from "fs"; +import * as fs from 'fs'; +import * as which from 'which'; type SandboxMethod = (...args: any[]) => any; -export interface ISandboxOptions { +/** + * + * A collection of options for weird and wonderful ways to run Grist. + * The sandbox at heart is just python, but run in different ways + * (sandbox 'flavors': pynbox, docker, gvisor, and unsandboxed). + * + * The "command" is an external program/container to call to run the + * sandbox, and it depends on sandbox flavor. Pynbox is built into + * Grist and has a hard-wired command, so the command option should be + * empty. For gvisor and unsandboxed, command is the path to an + * external program to run. For docker, it is the name of an image. + * + * Once python is running, ordinarily some Grist code should be + * started by setting `useGristEntrypoint` (the only exception is + * in tests). + * + * The Grist code that runs is by default grist/main.py. For plugins, + * this is overridden, to run whatever is specified by plugin.script. + * + */ +interface ISandboxOptions { + command?: string; // External program or container to call to run the sandbox. args: string[]; // The arguments to pass to the python process. + + // When doing imports, the sandbox is started somewhat differently. + // Directories are shared with the sandbox that are not otherwise. + // Options for that that are collected in `plugin`. TODO: update + // ISandboxCreationOptions to talk about directories instead of + // mounts, since it may not be possible to remap directories as + // mounts (e.g. for unsandboxed operation). + plugin?: { + importDir: string; // a directory containing data file(s) to import. + pluginDir: string; // a directory containing code for running the import. + script: string; // an entrypoint, relative to pluginDir. + } + + docUrl?: string; // URL to the document, for SELF_HYPERLINK + minimalPipeMode?: boolean; // Whether to use newer 3-pipe operation + deterministicMode?: boolean; // Whether to override time + randomness + exports?: {[name: string]: SandboxMethod}; // Functions made available to the sandboxed process. logCalls?: boolean; // (Not implemented) Whether to log all system calls from the python sandbox. logTimes?: boolean; // Whether to log time taken by calls to python sandbox. - unsilenceLog?: boolean; // Don't silence the sel_ldr logging. - selLdrArgs?: string[]; // Arguments passed to selLdr, for instance the following sets an - // environment variable `{ ... selLdrArgs: ['-E', 'PYTHONPATH=grist'] ... }`. + unsilenceLog?: boolean; // Don't silence the sel_ldr logging (pynbox only). logMeta?: log.ILogMeta; // Log metadata (e.g. including docId) to report in all log messages. - command?: string; - env?: NodeJS.ProcessEnv; + + useGristEntrypoint?: boolean; // Should be set for everything except tests, which + // may want to pass arguments to python directly. } type ResolveRejectPair = [(value?: any) => void, (reason?: unknown) => void]; @@ -39,62 +77,6 @@ type MsgCode = null | true | false; const recordBuffersRoot = process.env.RECORD_SANDBOX_BUFFERS_DIR; export class NSandbox implements ISandbox { - /** - * Helper function to run the nacl sandbox. It takes care of most arguments, similarly to - * nacl/bin/run script, but without the reliance on bash. We can't use bash when -r/-w options - * because on Windows it doesn't pass along the open file descriptors. Bash is also unavailable - * when installing a standalone version on Windows. - */ - public static spawn(options: ISandboxOptions): ChildProcess { - const {command, args: pythonArgs, unsilenceLog, env} = options; - const spawnOptions = { - stdio: ['pipe', 'pipe', 'pipe'] as 'pipe'[], - env - }; - const selLdrArgs = []; - if (!NSandbox._useMinimalPipes(env)) { - // add two more pipes - spawnOptions.stdio.push('pipe', 'pipe'); - // We use these options to set up communication with the sandbox: - // -r 3:3 to associate a file descriptor 3 on the outside of the sandbox with FD 3 on the - // inside, for reading from the inside. This becomes `this._streamToSandbox`. - // -w 4:4 to associate FD 4 on the outside with FD 4 on the inside for writing from the inside. - // This becomes `this._streamFromSandbox` - selLdrArgs.push('-r', '3:3', '-w', '4:4'); - } - if (options.selLdrArgs) { selLdrArgs.push(...options.selLdrArgs); } - - if (command) { - return spawn(command, pythonArgs, - {cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions}); - } - - const noLog = unsilenceLog ? [] : - (process.env.OS === 'Windows_NT' ? ['-l', 'NUL'] : ['-l', '/dev/null']); - for (const [key, value] of _.toPairs(env)) { - selLdrArgs.push("-E"); - selLdrArgs.push(`${key}=${value}`); - } - return spawn('sandbox/nacl/bin/sel_ldr', [ - '-B', './sandbox/nacl/lib/irt_core.nexe', '-m', './sandbox/nacl/root:/:ro', - ...noLog, - ...selLdrArgs, - './sandbox/nacl/lib/runnable-ld.so', - '--library-path', '/slib', '/python/bin/python2.7.nexe', - ...pythonArgs - ], - spawnOptions, - ); - } - - // Check if environment is configured for minimal pipes. - private static _useMinimalPipes(env: NodeJS.ProcessEnv | undefined) { - if (!env?.PIPE_MODE) { return false; } - if (env.PIPE_MODE !== 'minimal') { - throw new Error(`unrecognized pipe mode: ${env.PIPE_MODE}`); - } - return true; - } public readonly childProc: ChildProcess; private _logTimes: boolean; @@ -119,16 +101,25 @@ export class NSandbox implements ISandbox { /* * Callers may listen to events from sandbox.childProc (a ChildProcess), e.g. 'close' and 'error'. * The sandbox listens for 'aboutToExit' event on the process, to properly shut down. + * + * Grist interacts with the sandbox via message passing through pipes to an isolated + * process. Some read-only shared code is made available to the sandbox. + * For plugins, read-only data files are made available. + * + * At the time of writing, Grist has been using an NaCl sandbox with python2.7 compiled + * for it for several years (pynbox), and we are now experimenting with other sandboxing + * options. Variants can be activated by passing in a non-default "spawner" function. + * */ - constructor(options: ISandboxOptions) { + constructor(options: ISandboxOptions, spawner: SpawnFn = pynbox) { this._logTimes = Boolean(options.logTimes || options.logCalls); this._exportedFunctions = options.exports || {}; - this.childProc = NSandbox.spawn(options); + this.childProc = spawner(options); this._logMeta = {sandboxPid: this.childProc.pid, ...options.logMeta}; - if (NSandbox._useMinimalPipes(options.env)) { + if (options.minimalPipeMode) { log.rawDebug("3-pipe Sandbox started", this._logMeta); this._streamToSandbox = this.childProc.stdin; this._streamFromSandbox = this.childProc.stdout; @@ -343,67 +334,365 @@ export class NSandbox implements ISandbox { } } +/** + * Functions for spawning all of the currently supported sandboxes. + */ +const spawners = { + pynbox, // Grist's "classic" sandbox - python2 within NaCl. + unsandboxed, // No sandboxing, straight to host python. + // This offers no protection to the host. + docker, // Run sandboxes in distinct docker containers. + gvisor, // Gvisor's runsc sandbox. +}; + +/** + * A sandbox factory. This doesn't do very much beyond remembering a default + * flavor of sandbox (which at the time of writing differs between hosted grist and + * grist-core), and trying to regularize creation options a bit. + * + * The flavor of sandbox to use can be overridden by two environment variables: + * - GRIST_SANDBOX_FLAVOR: should be one of the spawners (pynbox, unsandboxed, docker, + * gvisor) + * - GRIST_SANDBOX: a program or image name to run as the sandbox. Not needed for + * pynbox (it is either built in or not avaiable). For unsandboxed, should be an + * absolute path to python within a virtualenv with all requirements installed. + * For docker, it should be `grist-docker-sandbox` (an image built via makefile + * in `sandbox/docker`) or a derived image. For gvisor, it should be the full path + * to `sandbox/gvisor/run.py` (if runsc available locally) or to + * `sandbox/gvisor/wrap_in_docker.sh` (if runsc should be run using the docker + * image built in that directory). Gvisor is not yet available in grist-core. + */ export class NSandboxCreator implements ISandboxCreator { - public constructor(private _flavor: 'pynbox' | 'unsandboxed') { + private _flavor: keyof typeof spawners; + private _command?: string; + + public constructor(options: {defaultFlavor: keyof typeof spawners}) { + const flavor = process.env.GRIST_SANDBOX_FLAVOR || options.defaultFlavor; + if (!Object.keys(spawners).includes(flavor)) { + throw new Error(`Unrecognized sandbox flavor: ${flavor}`); + } + this._flavor = flavor as keyof typeof spawners; + this._command = process.env.GRIST_SANDBOX; } public create(options: ISandboxCreationOptions): ISandbox { - const pynbox = this._flavor === 'pynbox'; - // Main script to run. - const defaultEntryPoint = pynbox ? 'grist/main.pyc' : 'grist/main.py'; - const args = [options.entryPoint || defaultEntryPoint]; + const args: string[] = []; if (!options.entryPoint && options.comment) { // When using default entry point, we can add on a comment as an argument - it isn't // used, but will show up in `ps` output for the sandbox process. Comment is intended // to be a document name/id. args.push(options.comment); } - const selLdrArgs: string[] = []; - if (options.sandboxMount) { - selLdrArgs.push( - // TODO: Only modules that we share with plugins should be mounted. They could be gathered in - // a "$APPROOT/sandbox/plugin" folder, only which get mounted. - '-m', `${options.sandboxMount}:/sandbox:ro`); - } - if (options.importMount) { - selLdrArgs.push('-m', `${options.importMount}:/importdir:ro`); - } - const pythonVersion = 'python2.7'; - const env: NodeJS.ProcessEnv = { - // Python library path is only configurable when flavor is unsandboxed. - // In this case, expect to find library files in a virtualenv built by core - // buildtools/prepare_python.sh - PYTHONPATH: pynbox ? 'grist:thirdparty' : - path.join(process.cwd(), 'sandbox', 'grist') + ':' + - path.join(process.cwd(), 'venv', 'lib', pythonVersion, 'site-packages'), - - DOC_URL: (options.docUrl || '').replace(/[^-a-zA-Z0-9_:/?&.]/, ''), - - // use stdin/stdout/stderr only. - PIPE_MODE: 'minimal', - - // Making time and randomness act deterministically for testing purposes. - // See test/utils/recordPyCalls.ts - ...(process.env.LIBFAKETIME_PATH ? { // path to compiled binary - DETERMINISTIC_MODE: '1', // tells python to seed the random module - FAKETIME: "2020-01-01 00:00:00", // setting for libfaketime - - // For Linux - LD_PRELOAD: process.env.LIBFAKETIME_PATH, - - // For Mac (https://github.com/wolfcw/libfaketime/blob/master/README.OSX) - DYLD_INSERT_LIBRARIES: process.env.LIBFAKETIME_PATH, - DYLD_FORCE_FLAT_NAMESPACE: '1', - } : {}), - }; - return new NSandbox({ + const translatedOptions: ISandboxOptions = { + minimalPipeMode: true, + deterministicMode: Boolean(process.env.LIBFAKETIME_PATH), + docUrl: options.docUrl, args, logCalls: options.logCalls, - logMeta: options.logMeta, + logMeta: {flavor: this._flavor, command: this._command, + entryPoint: options.entryPoint || '(default)', + ...options.logMeta}, logTimes: options.logTimes, - selLdrArgs, - env, - ...(pynbox ? {} : {command: pythonVersion}), - }); + command: this._command, + useGristEntrypoint: true, + }; + if (options.entryPoint) { + translatedOptions.plugin = { + script: options.entryPoint, + pluginDir: options.sandboxMount || '', + importDir: options.importMount || '', + }; + } + return new NSandbox(translatedOptions, spawners[this._flavor]); } } + +// A function that takes sandbox options and starts a sandbox process. +type SpawnFn = (options: ISandboxOptions) => ChildProcess; + +/** + * Helper function to run a nacl sandbox. It takes care of most arguments, similarly to + * nacl/bin/run script, but without the reliance on bash. We can't use bash when -r/-w options + * because on Windows it doesn't pass along the open file descriptors. Bash is also unavailable + * when installing a standalone version on Windows. + * + * This is quite old code, with attention to Windows support that is no longer tested. + * I've done my best to avoid changing behavior by not touching it too much. + */ +function pynbox(options: ISandboxOptions): ChildProcess { + const {command, args: pythonArgs, unsilenceLog, plugin} = options; + if (command) { + throw new Error("NaCl can only run the specific python2.7 package built for it"); + } + if (options.useGristEntrypoint) { + pythonArgs.unshift(plugin?.script || 'grist/main.pyc'); + } + const spawnOptions = { + stdio: ['pipe', 'pipe', 'pipe'] as 'pipe'[], + env: getWrappingEnv(options) + }; + const wrapperArgs = new FlagBag({env: '-E', mount: '-m'}); + if (plugin) { + + // TODO: Only modules that we share with plugins should be mounted. They could be gathered in + // a "$APPROOT/sandbox/plugin" folder, only which get mounted. + wrapperArgs.addMount(`${plugin.pluginDir}:/sandbox:ro`); + wrapperArgs.addMount(`${plugin.importDir}:/importdir:ro`); + } + + if (!options.minimalPipeMode) { + // add two more pipes + spawnOptions.stdio.push('pipe', 'pipe'); + // We use these options to set up communication with the sandbox: + // -r 3:3 to associate a file descriptor 3 on the outside of the sandbox with FD 3 on the + // inside, for reading from the inside. This becomes `this._streamToSandbox`. + // -w 4:4 to associate FD 4 on the outside with FD 4 on the inside for writing from the inside. + // This becomes `this._streamFromSandbox` + wrapperArgs.push('-r', '3:3', '-w', '4:4'); + } + wrapperArgs.addAllEnv(getInsertedEnv(options)); + wrapperArgs.addEnv('PYTHONPATH', 'grist:thirdparty'); + + const noLog = unsilenceLog ? [] : + (process.env.OS === 'Windows_NT' ? ['-l', 'NUL'] : ['-l', '/dev/null']); + return spawn('sandbox/nacl/bin/sel_ldr', [ + '-B', './sandbox/nacl/lib/irt_core.nexe', '-m', './sandbox/nacl/root:/:ro', + ...noLog, + ...wrapperArgs.get(), + './sandbox/nacl/lib/runnable-ld.so', + '--library-path', '/slib', '/python/bin/python2.7.nexe', + ...pythonArgs + ], spawnOptions); +} + +/** + * Helper function to run python without sandboxing. GRIST_SANDBOX should have + * been set with an absolute path to a version of python within a virtualenv that + * has all the dependencies installed (e.g. the sandbox_venv3 virtualenv created + * by `./build python3`. Using system python works too, if all dependencies have + * been installed globally. + */ +function unsandboxed(options: ISandboxOptions): ChildProcess { + const {args: pythonArgs, plugin} = options; + const paths = getAbsolutePaths(options); + if (options.useGristEntrypoint) { + pythonArgs.unshift(paths.plugin?.script || paths.main); + } + const spawnOptions = { + stdio: ['pipe', 'pipe', 'pipe'] as 'pipe'[], + env: { + PYTHONPATH: paths.engine, + IMPORTDIR: plugin?.importDir, + ...getInsertedEnv(options), + ...getWrappingEnv(options), + } + }; + if (!options.minimalPipeMode) { + spawnOptions.stdio.push('pipe', 'pipe'); + } + let command = options.command; + if (!command) { + // No command specified. In this case, grist-core looks for a "venv" + // virtualenv; a python3 virtualenv would be in "sandbox_venv3". + // TODO: rationalize this, it is a product of haphazard growth. + for (const venv of ['sandbox_venv3', 'venv']) { + const pythonPath = path.join(process.cwd(), venv, 'bin', 'python'); + if (fs.existsSync(pythonPath)) { + command = pythonPath; + break; + } + } + // Fall back on system python. + if (!command) { + command = which.sync('python'); + } + } + return spawn(command, pythonArgs, + {cwd: path.join(process.cwd(), 'sandbox'), ...spawnOptions}); +} + +/** + * Helper function to run python in gvisor's runsc, with multiple + * sandboxes run within the same container. GRIST_SANDBOX should + * point to `sandbox/gvisor/run.py` (to map call onto gvisor's runsc + * directly) or `wrap_in_docker.sh` (to use runsc within a container). + * Be sure to read setup instructions in that directory. + */ +function gvisor(options: ISandboxOptions): ChildProcess { + const {command, args: pythonArgs} = options; + if (!command) { throw new Error("gvisor operation requires GRIST_SANDBOX"); } + if (!options.minimalPipeMode) { + throw new Error("gvisor only supports 3-pipe operation"); + } + const paths = getAbsolutePaths(options); + const wrapperArgs = new FlagBag({env: '-E', mount: '-m'}); + wrapperArgs.addEnv('PYTHONPATH', paths.engine); + wrapperArgs.addAllEnv(getInsertedEnv(options)); + wrapperArgs.addMount(paths.sandboxDir); + if (paths.plugin) { + wrapperArgs.addMount(paths.plugin.pluginDir); + wrapperArgs.addMount(paths.plugin.importDir); + wrapperArgs.addEnv('IMPORTDIR', paths.plugin.importDir); + pythonArgs.unshift(paths.plugin.script); + } else if (options.useGristEntrypoint) { + pythonArgs.unshift(paths.main); + } + if (options.deterministicMode) { + wrapperArgs.push('--faketime', FAKETIME); + } + return spawn(command, [...wrapperArgs.get(), 'python', '--', ...pythonArgs]); +} + +/** + * Helper function to run python in a container. Each sandbox run in a + * distinct container. GRIST_SANDBOX should be the name of an image where + * `python` can be run and all Grist dependencies are installed. See + * `sandbox/docker` for more. + */ +function docker(options: ISandboxOptions): ChildProcess { + const {args: pythonArgs, command} = options; + if (options.useGristEntrypoint) { + pythonArgs.unshift(options.plugin?.script || 'grist/main.py'); + } + if (!options.minimalPipeMode) { + throw new Error("docker only supports 3-pipe operation (although runc has --preserve-file-descriptors)"); + } + const paths = getAbsolutePaths(options); + const plugin = paths.plugin; + const wrapperArgs = new FlagBag({env: '--env', mount: '-v'}); + if (plugin) { + wrapperArgs.addMount(`${plugin.pluginDir}:/sandbox:ro`); + wrapperArgs.addMount(`${plugin.importDir}:/importdir:ro`); + } + wrapperArgs.addMount(`${paths.engine}:/grist:ro`); + wrapperArgs.addAllEnv(getInsertedEnv(options)); + wrapperArgs.addEnv('PYTHONPATH', 'grist:thirdparty'); + const commandParts: string[] = ['python']; + if (options.deterministicMode) { + // DETERMINISTIC_MODE is already set by getInsertedEnv(). We also take + // responsibility here for running faketime around python. + commandParts.unshift('faketime', '-f', FAKETIME); + } + const dockerPath = which.sync('docker'); + return spawn(dockerPath, [ + 'run', '--rm', '-i', '--network', 'none', + ...wrapperArgs.get(), + command || 'grist-docker-sandbox', // this is the docker image to use + ...commandParts, + ...pythonArgs, + ]); +} + +/** + * Collect environment variables that should end up set within the sandbox. + */ +function getInsertedEnv(options: ISandboxOptions) { + const env: NodeJS.ProcessEnv = { + DOC_URL: (options.docUrl || '').replace(/[^-a-zA-Z0-9_:/?&.]/, ''), + + // use stdin/stdout/stderr only. + PIPE_MODE: options.minimalPipeMode ? 'minimal' : 'classic', + }; + + if (options.deterministicMode) { + // Making time and randomness act deterministically for testing purposes. + // See test/utils/recordPyCalls.ts + // tells python to seed the random module + env.DETERMINISTIC_MODE = '1'; + } + return env; +} + +/** + * Collect environment variables to activate faketime if needed. The paths + * here only make sense for unsandboxed operation, or for pynbox. For gvisor, + * faketime doesn't work, and must be done inside the sandbox. For docker, + * likewise wrapping doesn't make sense. In those cases, LIBFAKETIME_PATH can + * just be set to ON to activate faketime in a sandbox dependent manner. + */ +function getWrappingEnv(options: ISandboxOptions) { + const env: NodeJS.ProcessEnv = options.deterministicMode ? { + // Making time and randomness act deterministically for testing purposes. + // See test/utils/recordPyCalls.ts + FAKETIME, // setting for libfaketime + // For Linux + LD_PRELOAD: process.env.LIBFAKETIME_PATH, + + // For Mac (https://github.com/wolfcw/libfaketime/blob/master/README.OSX) + DYLD_INSERT_LIBRARIES: process.env.LIBFAKETIME_PATH, + DYLD_FORCE_FLAT_NAMESPACE: '1', + } : {}; + return env; +} + +/** + * Extract absolute paths from options. By sticking with the directory + * structure on the host rather than remapping, we can simplify nesting + * wrappers, or cases where remapping isn't possible. It does leak the names + * of the host directories though, and there could be silly complications if the + * directories have spaces or other idiosyncracies. When committing to a sandbox + * technology, for stand-alone Grist, it would be worth rethinking this. + */ +function getAbsolutePaths(options: ISandboxOptions) { + // Get path to sandbox directory - this is a little idiosyncratic to work well + // in grist-core. It is important to use real paths since we may be viewing + // the file system through a narrow window in a container. + const sandboxDir = path.join(fs.realpathSync(path.join(process.cwd(), 'sandbox', 'grist')), + '..'); + // Copy plugin options, and then make them absolute. + const plugin = options.plugin && { ...options.plugin }; + if (plugin) { + plugin.pluginDir = fs.realpathSync(plugin.pluginDir); + plugin.importDir = fs.realpathSync(plugin.importDir); + // Plugin dir is ..../sandbox, and entry point is sandbox/... + // This may not be a general rule, it may be just for the "core" plugin, but + // that suffices for now. + plugin.script = path.join(plugin.pluginDir, '..', plugin.script); + } + return { + sandboxDir, + plugin, + main: path.join(sandboxDir, 'grist/main.py'), + engine: path.join(sandboxDir, 'grist'), + }; +} + +/** + * A tiny abstraction to make code setting up command line arguments a bit + * easier to read. The sandboxes are quite similar in spirit, but differ + * a bit in exact flags used. + */ +class FlagBag { + private _args: string[] = []; + + constructor(private _options: {env: '--env'|'-E', mount: '-m'|'-v'}) { + } + + // channel env variables for sandbox via -E / --env + public addEnv(key: string, value: string|undefined) { + this._args.push(this._options.env, key + '=' + (value || '')); + } + + // Channel all of the supplied env variables + public addAllEnv(env: NodeJS.ProcessEnv) { + for (const [key, value] of _.toPairs(env)) { + this.addEnv(key, value); + } + } + + // channel shared directory for sandbox via -m / -v + public addMount(share: string) { + this._args.push(this._options.mount, share); + } + + // add some ad-hoc arguments + public push(...args: string[]) { + this._args.push(...args); + } + + // get the final list of arguments + public get() { return this._args; } +} + +// Standard time to default to if faking time. +const FAKETIME = '2020-01-01 00:00:00'; diff --git a/app/server/lib/requestUtils.ts b/app/server/lib/requestUtils.ts index 303a78c0..6248f782 100644 --- a/app/server/lib/requestUtils.ts +++ b/app/server/lib/requestUtils.ts @@ -233,6 +233,10 @@ export function optIntegerParam(p: any): number|undefined { return undefined; } +export function optJsonParam(p: any, defaultValue: any): any { + if (typeof p !== 'string') { return defaultValue; } + return gutil.safeJsonParse(p, defaultValue); +} export interface RequestWithGristInfo extends Request { gristInfo?: string; diff --git a/app/server/serverMethods.ts b/app/server/serverMethods.ts index 61fe06a3..80d481a8 100644 --- a/app/server/serverMethods.ts +++ b/app/server/serverMethods.ts @@ -3,6 +3,7 @@ import {parseExportFileName, parseExportParameters} from 'app/server/lib/Export' import {makeCSV} from 'app/server/lib/ExportCSV'; import {makeXLSX} from 'app/server/lib/ExportXLSX'; import * as log from 'app/server/lib/log'; +import {integerParam, stringParam} from 'app/server/lib/requestUtils'; import * as contentDisposition from 'content-disposition'; import * as express from 'express'; @@ -14,8 +15,8 @@ export async function generateCSV(req: express.Request, res: express.Response, c sortOrder } = parseExportParameters(req); - const clientId = req.query.clientId; - const docFD = parseInt(req.query.docFD, 10); + const clientId = stringParam(req.query.clientId); + const docFD = integerParam(req.query.docFD); const client = comm.getClient(clientId); const docSession = client.getDocSession(docFD); const activeDoc = docSession.activeDoc; @@ -41,8 +42,8 @@ export async function generateCSV(req: express.Request, res: express.Response, c export async function generateXLSX(req: express.Request, res: express.Response, comm: Comm) { log.debug(`Generating .xlsx file`); - const clientId = req.query.clientId; - const docFD = parseInt(req.query.docFD, 10); + const clientId = stringParam(req.query.clientId); + const docFD = integerParam(req.query.docFD); const client = comm.getClient(clientId); const docSession = client.getDocSession(docFD); const activeDoc = docSession.activeDoc; diff --git a/buildtools/prepare_python3.sh b/buildtools/prepare_python3.sh new file mode 100755 index 00000000..5e9aee44 --- /dev/null +++ b/buildtools/prepare_python3.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -e + +if [ ! -e sandbox_venv3 ]; then + virtualenv -ppython3 sandbox_venv3 +fi + +. sandbox_venv3/bin/activate + +pip install --no-deps -r sandbox/requirements3.txt diff --git a/package.json b/package.json index 40b5f2b9..ce0a4c2e 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "@types/sqlite3": "3.1.6", "@types/tmp": "0.0.33", "@types/uuid": "3.4.4", + "@types/which": "2.0.1", "catw": "1.0.1", "chai": "4.2.0", "chai-as-promised": "7.1.1", diff --git a/plugins/core/manifest.yml b/plugins/core/manifest.yml new file mode 100644 index 00000000..e1abb1c3 --- /dev/null +++ b/plugins/core/manifest.yml @@ -0,0 +1,24 @@ +name: core +version: 0.0.0 +description: Grist core features +components: + safePython: sandbox/main.py +contributions: + fileParsers: + - fileExtensions: ["csv"] + parseFile: + component: safePython + name: csv_parser + - fileExtensions: ["xls", "xlsx", "tsv", "txt", "xlsm"] + parseFile: + component: safePython + name: xls_parser + - fileExtensions: ["json"] + parseFile: + component: safePython + name: json_parser + +scripts: + build: + # Note that ${XUNIT:+xxx} inserts "xxx" when XUNIT is set, and nothing otherwise. + test: $GRIST_PYTHON -m runtests discover -v -s /sandbox ${XUNIT:+--xunit} diff --git a/plugins/core/sandbox/backports/__init__.py b/plugins/core/sandbox/backports/__init__.py new file mode 100644 index 00000000..69e3be50 --- /dev/null +++ b/plugins/core/sandbox/backports/__init__.py @@ -0,0 +1 @@ +__path__ = __import__('pkgutil').extend_path(__path__, __name__) diff --git a/plugins/core/sandbox/backports/functools_lru_cache.py b/plugins/core/sandbox/backports/functools_lru_cache.py new file mode 100644 index 00000000..707c6c76 --- /dev/null +++ b/plugins/core/sandbox/backports/functools_lru_cache.py @@ -0,0 +1,184 @@ +from __future__ import absolute_import + +import functools +from collections import namedtuple +from threading import RLock + +_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) + + +@functools.wraps(functools.update_wrapper) +def update_wrapper(wrapper, + wrapped, + assigned = functools.WRAPPER_ASSIGNMENTS, + updated = functools.WRAPPER_UPDATES): + """ + Patch two bugs in functools.update_wrapper. + """ + # workaround for http://bugs.python.org/issue3445 + assigned = tuple(attr for attr in assigned if hasattr(wrapped, attr)) + wrapper = functools.update_wrapper(wrapper, wrapped, assigned, updated) + # workaround for https://bugs.python.org/issue17482 + wrapper.__wrapped__ = wrapped + return wrapper + + +class _HashedSeq(list): + __slots__ = 'hashvalue' + + def __init__(self, tup, hash=hash): + self[:] = tup + self.hashvalue = hash(tup) + + def __hash__(self): + return self.hashvalue + + +def _make_key(args, kwds, typed, + kwd_mark=(object(),), + fasttypes=set([int, str, frozenset, type(None)]), + sorted=sorted, tuple=tuple, type=type, len=len): + 'Make a cache key from optionally typed positional and keyword arguments' + key = args + if kwds: + sorted_items = sorted(kwds.items()) + key += kwd_mark + for item in sorted_items: + key += item + if typed: + key += tuple(type(v) for v in args) + if kwds: + key += tuple(type(v) for k, v in sorted_items) + elif len(key) == 1 and type(key[0]) in fasttypes: + return key[0] + return _HashedSeq(key) + + +def lru_cache(maxsize=100, typed=False): + """Least-recently-used cache decorator. + + If *maxsize* is set to None, the LRU features are disabled and the cache + can grow without bound. + + If *typed* is True, arguments of different types will be cached separately. + For example, f(3.0) and f(3) will be treated as distinct calls with + distinct results. + + Arguments to the cached function must be hashable. + + View the cache statistics named tuple (hits, misses, maxsize, currsize) with + f.cache_info(). Clear the cache and statistics with f.cache_clear(). + Access the underlying function with f.__wrapped__. + + See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used + + """ + + # Users should only access the lru_cache through its public API: + # cache_info, cache_clear, and f.__wrapped__ + # The internals of the lru_cache are encapsulated for thread safety and + # to allow the implementation to change (including a possible C version). + + def decorating_function(user_function): + + cache = dict() + stats = [0, 0] # make statistics updateable non-locally + HITS, MISSES = 0, 1 # names for the stats fields + make_key = _make_key + cache_get = cache.get # bound method to lookup key or return None + _len = len # localize the global len() function + lock = RLock() # because linkedlist updates aren't threadsafe + root = [] # root of the circular doubly linked list + root[:] = [root, root, None, None] # initialize by pointing to self + nonlocal_root = [root] # make updateable non-locally + PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields + + if maxsize == 0: + + def wrapper(*args, **kwds): + # no caching, just do a statistics update after a successful call + result = user_function(*args, **kwds) + stats[MISSES] += 1 + return result + + elif maxsize is None: + + def wrapper(*args, **kwds): + # simple caching without ordering or size limit + key = make_key(args, kwds, typed) + result = cache_get(key, root) # root used here as a unique not-found sentinel + if result is not root: + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + cache[key] = result + stats[MISSES] += 1 + return result + + else: + + def wrapper(*args, **kwds): + # size limited caching that tracks accesses by recency + key = make_key(args, kwds, typed) if kwds or typed else args + with lock: + link = cache_get(key) + if link is not None: + # record recent use of the key by moving it to the front of the list + root, = nonlocal_root + link_prev, link_next, key, result = link + link_prev[NEXT] = link_next + link_next[PREV] = link_prev + last = root[PREV] + last[NEXT] = root[PREV] = link + link[PREV] = last + link[NEXT] = root + stats[HITS] += 1 + return result + result = user_function(*args, **kwds) + with lock: + root, = nonlocal_root + if key in cache: + # getting here means that this same key was added to the + # cache while the lock was released. since the link + # update is already done, we need only return the + # computed result and update the count of misses. + pass + elif _len(cache) >= maxsize: + # use the old root to store the new key and result + oldroot = root + oldroot[KEY] = key + oldroot[RESULT] = result + # empty the oldest link and make it the new root + root = nonlocal_root[0] = oldroot[NEXT] + oldkey = root[KEY] + root[KEY] = root[RESULT] = None + # now update the cache dictionary for the new links + del cache[oldkey] + cache[key] = oldroot + else: + # put result in a new link at the front of the list + last = root[PREV] + link = [last, root, key, result] + last[NEXT] = root[PREV] = cache[key] = link + stats[MISSES] += 1 + return result + + def cache_info(): + """Report cache statistics""" + with lock: + return _CacheInfo(stats[HITS], stats[MISSES], maxsize, len(cache)) + + def cache_clear(): + """Clear the cache and cache statistics""" + with lock: + cache.clear() + root = nonlocal_root[0] + root[:] = [root, root, None, None] + stats[:] = [0, 0] + + wrapper.__wrapped__ = user_function + wrapper.cache_info = cache_info + wrapper.cache_clear = cache_clear + return update_wrapper(wrapper, user_function) + + return decorating_function diff --git a/plugins/core/sandbox/dateguess.py b/plugins/core/sandbox/dateguess.py new file mode 100644 index 00000000..dcc13b56 --- /dev/null +++ b/plugins/core/sandbox/dateguess.py @@ -0,0 +1,479 @@ +"""This module guesses possible formats of dates which can be parsed using datetime.strptime +based on samples. + +dateguesser.guess(sample) +dateguesser.guess takes a sample date string and returns a set of +datetime.strftime/strptime-compliant date format strings that will correctly parse. + +dateguesser.guess_bulk(list_of_samples, error_rate=0) +dateguesser.guess_bulk takes a list of sample date strings and acceptable error rate +and returns a list of datetime.strftime/strptime-compliant date format strings +sorted by error rate that will correctly parse. + +Algorithm: + + 1. Tokenize input string into chunks based on character type: digits, alphas, the rest. + 2. Analyze each token independently in terms what format codes could represent + 3. For given list of tokens generate all permutations of format codes + 4. During generating permutations check for validness of generated format and skip if invalid. + 5. Use rules listed below to decide if format is invalid: + +Invalid format checks: + + Rule #1: Year MUST be in the date. Year is the minimum possible parsable date. + Rule #2. No holes (missing parts) in the format parts. + Rule #3. Time parts are neighbors to each other. No interleaving time with the date. + Rule #4. It's highly impossible that minutes coming before hour, millis coming before seconds etc + Rule #5. Pattern can't have some part of date/time defined more than once. + Rule #6: Separators between elements of the time group should be the same. + Rule #7: If am/pm is in date we assume that 12-hour dates are allowed only. Otherwise it's 24-hour + Rule #8: Year can't be between other date elements + +Note: + dateguess doesn't support defaulting to current year because parsing should be deterministic, + it's better to to fail guessing the format then to guess it incorrectly. + +Examples: + >>> guess('2014/05/05 14:00:00 UTC') + set(['%Y/%d/%m %H:%M:%S %Z', '%Y/%m/%d %H:%M:%S %Z']) + >>> guess('12/12/12') + set(['%y/%m/%d', '%d/%m/%y', '%m/%d/%y', '%y/%d/%m']) + >>> guess_bulk(['12-11-2014', '12-25-2014']) + ['%m-%d-%Y'] + >>> guess_bulk(['12-11-2014', '25-25-2014']) + [] + >>> guess_bulk(['12-11-2013', '13-8-2013', '05-25-2013', '12-25-2013'], error_rate=0.5) + ['%m-%d-%Y'] +""" + + +import calendar +import itertools +import logging +import re +from collections import defaultdict + +from backports.functools_lru_cache import lru_cache +import moment + + +MONTH_NAME = calendar.month_name +MONTH_ABBR = calendar.month_abbr +TZ_VALID_NAMES = {z[0] for z in moment.get_tz_data().items()} +AM_PM = {'am', 'pm'} +DAYS_OF_WEEK_NAME = calendar.day_name +DAYS_OF_WEEK_ABBR = calendar.day_abbr + +DATE_ELEMENTS = [ + # Name Pattern Predicate Group (mutual exclusive) Consumes N prev elements + ("Year", "%Y", lambda x, p, v: x.isdigit() and len(x) == 4, "Y", 0), + ("Year short", "%y", lambda x, p, v: x.isdigit() and len(x) == 2, "Y", 0), + ("Month", "%m", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 < int(x) <= 12, "m", 0), + ("Month name full", "%B", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_NAME, "m", 0), + ("Month name abbr", "%b", lambda x, p, v: x.isalpha() and x.capitalize() in MONTH_ABBR, "m", 0), + ("Day", "%d", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 < int(x) <= 31, "d", 0), + ("Day of week", "%A", lambda x, p, v: x.isalpha() + and x.capitalize() in DAYS_OF_WEEK_NAME, "a", 0), + ("Day of week abbr", "%a", lambda x, p, v: x.isalpha() + and x.capitalize() in DAYS_OF_WEEK_ABBR, "a", 0), + + ("Compound HHMMSS", "%H%M%S", lambda x, p, v: x.isdigit() and len(x) == 6 + and 0 <= int(x[0:2]) < 24 + and 0 <= int(x[2:4]) < 60 + and 0 <= int(x[4:6]) < 60, "HMS", 0), + + ("Hour", "%H", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 23, "H", 0), + ("Hour in 12hr mode", "%I", lambda x, p, v: x.isdigit() and len(x) <= 2 + and 0 <= int(x) <= 11, "H", 0), + ("AM/PM", "%p", lambda x, p, v: x.isalpha() and len(x) == 2 and x.lower() in AM_PM, "p", 0), + ("Minutes", "%M", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 59, "M", 0), + ("Seconds", "%S", lambda x, p, v: x.isdigit() and len(x) <= 2 and 0 <= int(x) <= 59, "S", 0), + ("Fraction of second", "%f", lambda x, p, v: x.isdigit() and p is not None + and p.val == '.', "f", 0), + ("Timezone name", "%Z", lambda x, p, v: x.isalpha() and len(x) > 2 + and x in TZ_VALID_NAMES, "Z", 0), + ("Timezone +HHMM", "%z", lambda x, p, v: x.isdigit() and len(x) == 4 and 0 <= int(x[0:2]) < 15 + and 0 <= int(x[2:4]) < 60 and p is not None + and p.val == '+', "Z", 1), + ("Timezone -HHMM", "%z", lambda x, p, v: x.isdigit() and len(x) == 4 and 0 <= int(x[0:2]) < 15 + and 0 <= int(x[2:4]) < 60 and p is not None + and p.val == '-', "Z", 1), +] + + +class Token(object): + """Represents a part of a date string that's being parsed. + Note that __hash__ and __eq__ are overridden in order + to compare only meaningful parts of an object. + """ + def __init__(self, val, length): + self.val = val + self.length = length + self.compatible_types = () + + def __hash__(self): + h = hash(self.length) + hash(self.compatible_types) + if not self.compatible_types: + h += hash(self.val) + return hash(h) + + def __eq__(self, other): + """ + Two tokens are equal when these both are true: + a) length and compatible types are equal + b) if it is separator (no compatible types), separator values must be equal + """ + if self.length != other.length or self.compatible_types != other.compatible_types: + return False + if not other.compatible_types and self.val != other.val: + return False + return True + + +def _check_rule_1(pattern, types_used): + """Rule #1: Year MUST be in the date. Year is the minimum possible parsable date. + + Examples: + >>> _check_rule_1('%Y/%m/%d', 'Ymd') + True + >>> _check_rule_1('%m/%d', 'md') + False + """ + if 'Y' not in types_used: + logging.debug("Rule #1 is violated for pattern %s. Types used: %s", pattern, types_used) + return False + return True + + +def _check_rule_2(pattern, types_used): + """Rule #2: No holes (missing parts) in the format parts. + + Examples: + >>> _check_rule_2('%Y:%H', 'YH') + False + >>> _check_rule_2('%Y/%m/%d %H', 'YmdH') + True + """ + priorities = 'YmdHMSf' + seen_parts = [p in types_used for p in priorities] + if sorted(seen_parts, reverse=True) != seen_parts: + logging.debug("Rule #2 is violated for pattern %s. Types used: %s", pattern, types_used) + return False + return True + + +def _check_rule_3(pattern, types_used): + """Rule #3: Time parts are neighbors to time only. No interleaving time with the date. + + Examples: + >>> _check_rule_3('%m/%d %H:%M %Y', 'mdHMY') + True + >>> _check_rule_3('%m/%d %H:%Y:%M', 'mdHYM') + False + """ + time_parts = 'HMSf' + time_parts_highlighted = [t in time_parts for t in types_used] + time_parts_deduplicated = [a[0] for a in itertools.groupby(time_parts_highlighted)] + if len(list(filter(lambda x: x, time_parts_deduplicated))) > 1: + logging.debug("Rule #3 is violated for pattern %s. Types used: %s", pattern, types_used) + return False + return True + + +def _check_rule_4(pattern, types_used): + """Rule #4: It's highly impossible that minutes coming before hours, + millis coming before seconds etc. + + Examples: + >>> _check_rule_4('%H:%M', 'HM') + True + >>> _check_rule_4('%S:%M', 'SM') + False + """ + time_parts_priority = 'HMSf' + time_parts_indexes = list(filter(lambda x: x >= 0, + [time_parts_priority.find(t) for t in types_used])) + if sorted(time_parts_indexes) != time_parts_indexes: + logging.debug("Rule #4 is violated for pattern %s. Types used: %s", pattern, types_used) + return False + return True + + +def _check_rule_5(pattern, types_used): + """Rule #5: Pattern can't have some part of date/time defined more than once. + + Examples: + >>> _check_rule_5('%Y/%Y', 'YY') + False + >>> _check_rule_5('%m/%b', 'mm') + False + >>> _check_rule_5('%Y/%m', 'Ym') + True + """ + if len(types_used) != len(set(types_used)): + logging.debug("Rule #5 is violated for pattern %s. Types used: %s", pattern, types_used) + return False + return True + + +def _check_rule_6(tokens_chosen, pattern, types_used): + """Rule #6: Separators between elements of the time group should be the same. + + Examples: + _check_rule_5(tokens_chosen_1, '%Y-%m-%dT%H:%M:%S', 'YmdHMS') => True + _check_rule_5(tokens_chosen_2, '%Y-%m-%dT%H %M %S', 'YmdHMS') => True + _check_rule_5(tokens_chosen_3, '%Y-%m-%dT%H-%M:%S', 'YmdHMS') => False (different separators + ('-' and ':') in time group) + """ + time_parts = 'HMS' + num_of_time_parts_used = len(list(filter(lambda x: x in time_parts, types_used))) + time_parts_seen = 0 + separators_seen = [] + previous_was_a_separator = False + + for token in tokens_chosen: + if token[1] is not None and token[1][3] in time_parts: + # This rule doesn't work for separator-less time group so when we found the type + # and it's three letters then it's (see type "Compound HHMMSS") then stop iterating + if len(token[1][3]) == 3: + break + # If not a first time then + if time_parts_seen > 0 and not previous_was_a_separator: + separators_seen.append(None) + time_parts_seen += 1 + if time_parts_seen == num_of_time_parts_used: + break + previous_was_a_separator = False + else: + if time_parts_seen > 0: + separators_seen.append(token[0].val) + previous_was_a_separator = True + + if len(set(separators_seen)) > 1: + logging.debug("Rule #6 is violated for pattern %s. Seen separators: %s", + pattern, separators_seen) + return False + return True + + +def _check_rule_7a(pattern): + """Rule #7a: If am/pm is in date we assume that 12-hour dates are allowed only. + Otherwise it's 24-hour. + + Examples: + >>> _check_rule_7a('%Y/%m/%d %H:%M %p') + False + >>> _check_rule_7a('%Y/%m/%d %I:%M %p') + True + """ + if '%p' in pattern and '%H' in pattern: + logging.debug("Rule #7a is violated for pattern %s", pattern) + return False + return True + + +def _check_rule_7b(pattern): + """Rule #7b: If am/pm is in date we assume that 12-hour dates are allowed only. + Otherwise it's 24-hour. + + Examples: + >>> _check_rule_7b('%Y/%m/%d %I:%M') + False + >>> _check_rule_7b('%Y/%m/%d %I:%M %p') + True + """ + if '%I' in pattern and '%p' not in pattern: + logging.debug("Rule #7b is violated for pattern %s", pattern) + return False + return True + + +def _check_rule_8(pattern, types_used): + """Rule #9: Year can't be between other date elements + + Examples: + >>> _check_rule_8('%m/%Y/%d %I:%M', 'mYdIM') + False + """ + if 'mYd' in types_used or 'dYm' in types_used: + logging.debug("Rule #8 is violated for pattern %s", pattern) + return False + return True + + +def _tokenize_by_character_class(s): + """Return a list of strings by splitting s (tokenizing) by character class. + + Example: + >>> t = _tokenize_by_character_class('Thu, May 14th, 2014 1:15 pm +0000') + >>> [i.val for i in t] + ['Thu', ',', ' ', 'May', ' ', '14', 'th', ',', ' ', '2014', ' ', '1', ':', '15', ' ', 'pm', ' ', '+', '0000'] + + >>> t = _tokenize_by_character_class('5/14/2014') + >>> [i.val for i in t] + ['5', '/', '14', '/', '2014'] + """ + res = re.split(r'(\d+)|(\W)|(_)', s) + return [Token(i, len(i)) for i in res if i] + + +def _sliding_triplets(tokens): + for idx, t in enumerate(tokens): + yield (t, tokens[idx-1] if idx > 0 else None, tokens[idx+1] if idx < len(tokens)-1 else None) + + +def _analyze_tokens(tokens): + """Analize each token and find out compatible types for it.""" + for token, prev, nxt in _sliding_triplets(tokens): + token.compatible_types = tuple([t for t in DATE_ELEMENTS if t[2](token.val, prev, nxt)]) + + +@lru_cache() +def _generate_all_permutations(tokens): + """Generate all permutations of format codes for given list of tokens. + + Brute-forcing of all possible permutations and rules checking eats most of the time or date + parsing. But since the input is expected to be highly uniform then we can expect that + memoization of this step will be very efficient. + + Token contains values for date parts but due to overridden eq and hash methods, + we treat two tokens having the same length and same possible formats as equal + tokens and separators should be the same + """ + all_patterns = set() + _generate_all_permutations_recursive(tokens, 0, [], "", all_patterns, "") + + return all_patterns + + +def _check_is_pattern_valid_quick_fail_rules(pattern, types_used): + """Apply rules which are applicable for partially constructed patterns. + + Example: duplicates of a date part in a pattern. + """ + return _check_rule_5(pattern, types_used) \ + and _check_rule_4(pattern, types_used) \ + and _check_rule_7a(pattern) + + +def _check_is_pattern_valid_full_pattern_rules(tokens_chosen, pattern, types_used): + """Apply rules which are applicable for full pattern only. + + Example: existence of Year part in the pattern. + """ + return _check_rule_1(pattern, types_used) \ + and _check_rule_2(pattern, types_used) \ + and _check_rule_3(pattern, types_used) \ + and _check_rule_6(tokens_chosen, pattern, types_used) \ + and _check_rule_7b(pattern) \ + and _check_rule_8(pattern, types_used) + + +def _generate_all_permutations_recursive(tokens, token_idx, tokens_chosen, pattern, found_patterns, + types_used): + """Generate all format elements permutations recursively. + + Args: + tokens (list[Token]): List of tokens. + token_idx (int): Index of token processing this cycle. + tokens_chosen (list[(Token, Token.compatible_type)]): List of tuples + containing token and compatible type + pattern (str): String containing format for parsing + found_patterns (set): Set of guessed patterns + types_used (str): String of types used to build pattern. + + Returns: + list: List of permutations + """ + if not _check_is_pattern_valid_quick_fail_rules(pattern, types_used): + return + + if token_idx < len(tokens): + t = tokens[token_idx] + if t.compatible_types: + for ct in t.compatible_types: + _generate_all_permutations_recursive(tokens, token_idx+1, tokens_chosen[:] + [(t, ct)], + (pattern if ct[4] == 0 else pattern[:-ct[4]]) + ct[1], + found_patterns, types_used + ct[3]) + else: + # if no compatible types it should be separator, add it to the pattern + _generate_all_permutations_recursive(tokens, token_idx+1, + tokens_chosen[:] + [(t, None)], pattern + t.val, + found_patterns, types_used) + else: + if _check_is_pattern_valid_full_pattern_rules(tokens_chosen, pattern, types_used): + found_patterns.add(pattern) + + +def guess(date): + """Guesses datetime.strftime/strptime-compliant date formats for date string. + + Args: + date (str): Date string. + + Returns: + set: Set of datetime.strftime/strptime-compliant date format strings + + Examples: + >>> guess('2014/05/05 14:00:00 UTC') + set(['%Y/%d/%m %H:%M:%S %Z', '%Y/%m/%d %H:%M:%S %Z']) + >>> guess('12/12/12') + set(['%y/%m/%d', '%d/%m/%y', '%m/%d/%y', '%y/%d/%m']) + """ + tokens = _tokenize_by_character_class(date) + _analyze_tokens(tokens) + return _generate_all_permutations(tuple(tokens)) + + +def guess_bulk(dates, error_rate=0): + """Guesses datetime.strftime/strptime-compliant date formats for list of the samples. + + Args: + dates (list): List of samples date strings. + error_rate (float): Acceptable error rate (default 0.0) + + Returns: + list: List of datetime.strftime/strptime-compliant date format strings sorted by error rate + + Examples: + >>> guess_bulk(['12-11-2014', '12-25-2014']) + ['%m-%d-%Y'] + >>> guess_bulk(['12-11-2014', '25-25-2014']) + [] + >>> guess_bulk(['12-11-2013', '13-8-2013', '05-25-2013', '12-25-2013'], error_rate=0.5) + ['%m-%d-%Y'] + """ + if error_rate == 0.0: + patterns = None + for date in dates: + guesses_patterns = guess(date) + if patterns is None: + patterns = guesses_patterns + else: + patterns = patterns.intersection(guesses_patterns) + if not patterns: + break # No need to iterate more if zero patterns found + return list(patterns) + else: + found_dates = 0 + pattern_counters = defaultdict(lambda: 0) + num_dates = len(dates) + min_num_dates_to_be_found = num_dates - num_dates * error_rate + + for idx, date in enumerate(dates): + patterns = guess(date) + if patterns: + found_dates += 1 + for pattern in patterns: + pattern_counters[pattern] = pattern_counters[pattern] + 1 + + # Early return if number of strings that can't be date is already over error rate + cells_left = num_dates - idx - 1 + cannot_be_found = float(found_dates + cells_left) < min_num_dates_to_be_found + if cannot_be_found: + return [] + + patterns = [(v, k) for k, v in pattern_counters.items() + if v > min_num_dates_to_be_found] + patterns.sort(reverse=True) + return [k for (v, k) in patterns] diff --git a/plugins/core/sandbox/import_csv.py b/plugins/core/sandbox/import_csv.py new file mode 100644 index 00000000..0bd7528f --- /dev/null +++ b/plugins/core/sandbox/import_csv.py @@ -0,0 +1,197 @@ +""" +Plugin for importing CSV files +""" +import os +import logging + +import chardet +import messytables +import six +from six.moves import zip + +import parse_data +import import_utils + + +log = logging.getLogger(__name__) + +SCHEMA = [ + { + 'name': 'lineterminator', + 'label': 'Line terminator', + 'type': 'string', + 'visible': True, + }, + { + 'name': 'include_col_names_as_headers', + 'label': 'First row contains headers', + 'type': 'boolean', + 'visible': True, + }, + { + 'name': 'delimiter', + 'label': 'Field separator', + 'type': 'string', + 'visible': True, + }, + { + 'name': 'skipinitialspace', + 'label': 'Skip leading whitespace', + 'type': 'boolean', + 'visible': True, + }, + { + 'name': 'quotechar', + 'label': 'Quote character', + 'type': 'string', + 'visible': True, + }, + { + 'name': 'doublequote', + 'label': 'Quotes in fields are doubled', + 'type': 'boolean', + 'visible': True, + }, + + { + 'name': 'quoting', + 'label': 'Convert quoted fields', + 'type': 'number', + 'visible': False, # Not supported by messytables + }, + { + 'name': 'escapechar', + 'label': 'Escape character', + 'type': 'string', + 'visible': False, # Not supported by messytables + }, + { + 'name': 'start_with_row', + 'label': 'Start with row', + 'type': 'number', + 'visible': False, # Not yet implemented + }, + { + 'name': 'NUM_ROWS', + 'label': 'Number of rows', + 'type': 'number', + 'visible': False, + }] + +def parse_file_source(file_source, options): + parsing_options, export_list = parse_file(import_utils.get_path(file_source["path"]), options) + return {"parseOptions": parsing_options, "tables": export_list} + +def parse_file(file_path, parse_options=None): + """ + Reads a file path and parse options that are passed in using ActiveDoc.importFile() + and returns a tuple with parsing options (users' or guessed) and an object formatted so that + it can be used by grist for a bulk add records action. + """ + parse_options = parse_options or {} + + with open(file_path, "rb") as f: + parsing_options, export_list = _parse_open_file(f, parse_options=parse_options) + return parsing_options, export_list + + +def _parse_open_file(file_obj, parse_options=None): + options = {} + csv_keys = ['delimiter', 'quotechar', 'lineterminator', 'doublequote', 'skipinitialspace'] + csv_options = {k: parse_options.get(k) for k in csv_keys} + if six.PY2: + csv_options = {k: v.encode('utf8') if isinstance(v, six.text_type) else v + for k, v in csv_options.items()} + + table_set = messytables.CSVTableSet(file_obj, + delimiter=csv_options['delimiter'], + quotechar=csv_options['quotechar'], + lineterminator=csv_options['lineterminator'], + doublequote=csv_options['doublequote'], + skipinitialspace=csv_options['skipinitialspace']) + + num_rows = parse_options.get('NUM_ROWS', 0) + + # Messytable's encoding detection uses too small a sample, so we override it here. + sample = file_obj.read(100000) + table_set.encoding = chardet.detect(sample)['encoding'] + # In addition, always prefer UTF8 over ASCII. + if table_set.encoding == 'ascii': + table_set.encoding = 'utf8' + + export_list = [] + # A table set is a collection of tables: + for row_set in table_set.tables: + table_name = None + sample_rows = list(row_set.sample) + # Messytables doesn't guess whether headers are present, so we need to step in. + data_offset, headers = import_utils.headers_guess(sample_rows) + + # Make sure all header values are strings. + for i, header in enumerate(headers): + if not isinstance(header, six.string_types): + headers[i] = six.text_type(header) + + log.info("Guessed data_offset as %s", data_offset) + log.info("Guessed headers as: %s", headers) + + have_guessed_headers = any(headers) + include_col_names_as_headers = parse_options.get('include_col_names_as_headers', + have_guessed_headers) + + if include_col_names_as_headers and not have_guessed_headers: + # use first line as headers + data_offset, first_row = import_utils.find_first_non_empty_row(sample_rows) + headers = import_utils.expand_headers(first_row, data_offset, sample_rows) + + elif not include_col_names_as_headers and have_guessed_headers: + # move guessed headers to data + data_offset -= 1 + headers = [''] * len(headers) + + row_set.register_processor(messytables.offset_processor(data_offset)) + + table_data_with_types = parse_data.get_table_data(row_set, len(headers), num_rows) + + # Identify and remove empty columns, and populate separate metadata and data lists. + column_metadata = [] + table_data = [] + for col_data, header in zip(table_data_with_types, headers): + if not header and all(val == "" for val in col_data["data"]): + continue # empty column + data = col_data.pop("data") + col_data["id"] = header + column_metadata.append(col_data) + table_data.append(data) + + if not table_data: + # Don't add tables with no columns. + continue + + guessed = row_set._dialect + quoting = parse_options.get('quoting') + options = {"delimiter": parse_options.get('delimiter', guessed.delimiter), + "doublequote": parse_options.get('doublequote', guessed.doublequote), + "lineterminator": parse_options.get('lineterminator', guessed.lineterminator), + "quotechar": parse_options.get('quotechar', guessed.quotechar), + "skipinitialspace": parse_options.get('skipinitialspace', guessed.skipinitialspace), + "include_col_names_as_headers": include_col_names_as_headers, + "start_with_row": 1, + "NUM_ROWS": num_rows, + "SCHEMA": SCHEMA + } + + log.info("Output table %r with %d columns", table_name, len(column_metadata)) + for c in column_metadata: + log.debug("Output column %s", c) + export_list.append({ + "table_name": table_name, + "column_metadata": column_metadata, + "table_data": table_data + }) + + return options, export_list + +def get_version(): + """ Return name and version of plug-in""" + pass diff --git a/plugins/core/sandbox/import_json.py b/plugins/core/sandbox/import_json.py new file mode 100644 index 00000000..51f899ad --- /dev/null +++ b/plugins/core/sandbox/import_json.py @@ -0,0 +1,257 @@ +""" +The import_json module converts json file into a list of grist tables. + +It supports data being structured as a list of record, turning each +object into a row and each object's key into a column. For +example: +``` +[{'a': 1, 'b': 'tree'}, {'a': 4, 'b': 'flowers'}, ... ] +``` +is turned into a table with two columns 'a' of type 'Int' and 'b' of +type 'Text'. + +Nested object are stored as references to a distinct table where the +nested object is stored. For example: +``` +[{'a': {'b': 4}}, ...] +``` +is turned into a column 'a' of type 'Ref:my_import_name.a', and into +another table 'my_import_name.a' with a column 'b' of type +'Int'. (Nested-nested objects are supported as well and the module +assumes no limit to the number of level of nesting you can do.) + +Each value which is not an object will be stored into a column with id +'' (empty string). For example: +``` +['apple', 'peach', ... ] +``` +is turned into a table with an un-named column that stores the values. + +Arrays are stored as a list of references to a table where the content +of the array is stored. For example: +``` +[{'items': [{'a':'apple'}, {'a':'peach'}]}, {'items': [{'a':'cucumber'}, {'a':'carots'}, ...]}, ...] +``` +is turned into a column named 'items' of type +'RefList:my_import_name.items' which points to another table named +'my_import_name.items' which has a column 'a' of type Text. + +Data could be structured with an object at the root as well in which +case, the object is considered to represent a single row, and gets +turned into a table with one row. + +A column's type is defined by the type of its first value that is not +None (ie: if another value with different type is stored in the same +column, the column's type remains unchanged), 'Text' otherwise. + +Usage: +import import_json +# if you have a file to parse +import_json.parse_file(file_path) + +# if data is already encoded with python's standard containers (dict and list) +import_json.dumps(data, import_name) + + +TODO: + - references should map to appropriate column type ie: `Ref:{$colname}` and + `RefList:{$colname}` (which depends on T413). + - Allows user to set the uniqueValues options per table. + - User should be able to choose some objects to be imported as + indexes: for instance: +``` +{ + 'pink lady': {'type': 'apple', 'taste': 'juicy'}, + 'gala': {'type': 'apple', 'taste': 'tart'}, + 'comice': {'type': 'pear', 'taste': 'lemon'}, + ... +} +``` + could be mapped to columns 'type', 'taste' and a 3rd that holds the + property 'name'. + +""" +import os +import json +from collections import OrderedDict, namedtuple +from itertools import count, chain + +import six + +import import_utils + +Ref = namedtuple('Ref', ['table_name', 'rowid']) +Row = namedtuple('Row', ['values', 'parent', 'ref']) +Col = namedtuple('Col', ['type', 'values']) + +GRIST_TYPES={ + float: "Numeric", + bool: "Bool", +} + +for typ in six.integer_types: + GRIST_TYPES[typ] = "Int" + +for typ in six.string_types: + GRIST_TYPES[typ] = "Text" + +SCHEMA = [{ + 'name': 'includes', + 'label': 'Includes (list of tables seperated by semicolon)', + 'type': 'string', + 'visible': True +}, { + 'name': 'excludes', + 'label': 'Excludes (list of tables seperated by semicolon)', + 'type': 'string', + 'visible': True +}] + +DEFAULT_PARSE_OPTIONS = { + 'includes': '', + 'excludes': '', + 'SCHEMA': SCHEMA +} + +def parse_file(file_source, parse_options): + "Deserialize `file_source` into a python object and dumps it into jgrist form" + path = import_utils.get_path(file_source['path']) + name, ext = os.path.splitext(file_source['origName']) + if 'SCHEMA' not in parse_options: + parse_options.update(DEFAULT_PARSE_OPTIONS) + with open(path, 'r') as json_file: + data = json.loads(json_file.read()) + + return dumps(data, name, parse_options) + +def dumps(data, name = "", parse_options = DEFAULT_PARSE_OPTIONS): + " Serializes `data` to a jgrist formatted object. " + tables = Tables(parse_options) + if not isinstance(data, list): + # put simple record into a list + data = [data] + for val in data: + tables.add_row(name, val) + return { + 'tables': tables.dumps(), + 'parseOptions': parse_options + } + + +class Tables(object): + """ + Tables maintains the list of tables indexed by their name. Each table + is a list of row. A row is a dictionary mapping columns id to a value. + """ + + def __init__(self, parse_options): + self._tables = OrderedDict() + self._includes_opt = list(filter(None, parse_options['includes'].split(';'))) + self._excludes_opt = list(filter(None, parse_options['excludes'].split(';'))) + + + def dumps(self): + " Dumps tables in jgrist format " + return [_dump_table(name, rows) for name, rows in six.iteritems(self._tables)] + + def add_row(self, table, value, parent = None): + """ + Adds a row to `table` and fill it with the content of value, then + returns a Ref object pointing to this row. Returns None if the row + was excluded. Calls itself recursively to add nested object and + lists. + """ + row = None + if self._is_included(table): + rows = self._tables.setdefault(table, []) + row = Row(OrderedDict(), parent, Ref(table, len(rows)+1)) + rows.append(row) + + # we need a dictionary to map values to the row's columns + value = _dictify(value) + for (k, val) in sorted(six.iteritems(value)): + if isinstance(val, dict): + val = self.add_row(table + '_' + k, val) + if row and val: + row.values[k] = val.ref + elif isinstance(val, list): + for list_val in val: + self.add_row(table + '_' + k, list_val, row) + else: + if row and self._is_included(table + '_' + k): + row.values[k] = val + return row + + + def _is_included(self, property_path): + is_included = (any(property_path.startswith(inc) for inc in self._includes_opt) + if self._includes_opt else True) + is_excluded = (any(property_path.startswith(exc) for exc in self._excludes_opt) + if self._excludes_opt else False) + return is_included and not is_excluded + + +def first_available_key(dictionary, name): + """ + Returns the first of (name, name2, name3 ...) that is not a key of + dictionary. + """ + names = chain([name], ("{}{}".format(name, i) for i in count(2))) + return next(n for n in names if n not in dictionary) + + +def _dictify(value): + """ + Converts non-dictionary value to a dictionary with a single + empty-string key mapping to the given value. Or returns the value + itself if it's already a dictionary. This is useful to map values to + row's columns. + """ + return value if isinstance(value, dict) else {'': value} + + +def _dump_table(name, rows): + "Converts a list of rows into a jgrist table and set 'table_name' to name." + columns = _transpose([r.values for r in rows]) + # find ref to first parent + ref = next((r.parent.ref for r in rows if r.parent), None) + if ref: + # adds a column to store ref to parent + col_id = first_available_key(columns, ref.table_name) + columns[col_id] = Col(_grist_type(ref), + [row.parent.ref if row.parent else None for row in rows]) + return { + 'column_metadata': [{'id': key, 'type': col.type} for (key, col) in six.iteritems(columns)], + 'table_data': [[_dump_value(val) for val in col.values] for col in columns.values()], + 'table_name': name + } + +def _transpose(rows): + """ + Transposes a collection of dictionary mapping key to values into a + dictionary mapping key to values. Values are encoded into a tuple + made of the grist_type of the first value that is not None and the + collection of values. + """ + transpose = OrderedDict() + values = OrderedDict() + for row in reversed(rows): + values.update(row) + for key, val in six.iteritems(values): + transpose[key] = Col(_grist_type(val), [row.get(key, None) for row in rows]) + return transpose + + +def _dump_value(value): + " Serialize a value." + if isinstance(value, Ref): + return value.rowid + return value + + +def _grist_type(value): + " Returns the grist type for value. " + val_type = type(value) + if val_type == Ref: + return 'Ref:{}'.format(value.table_name) + return GRIST_TYPES.get(val_type, 'Text') diff --git a/plugins/core/sandbox/import_utils.py b/plugins/core/sandbox/import_utils.py new file mode 100644 index 00000000..07d83b77 --- /dev/null +++ b/plugins/core/sandbox/import_utils.py @@ -0,0 +1,120 @@ +""" +Helper functions for import plugins +""" +import sys +import itertools +import logging +import os + +# Include /thirdparty into module search paths, in particular for messytables. +sys.path.append('/thirdparty') + +import six +from six.moves import zip + +log = logging.getLogger(__name__) + +# Get path to an imported file. +def get_path(file_source): + importdir = os.environ.get('IMPORTDIR') or '/importdir' + return os.path.join(importdir, file_source) + +def capitalize(word): + """Capitalize the first character in the word (without lowercasing the rest).""" + return word[0].capitalize() + word[1:] + +def _is_numeric(text): + for t in six.integer_types + (float, complex): + try: + t(text) + return True + except (ValueError, OverflowError): + pass + return False + + +def _is_header(header, data_rows): + """ + Returns whether header can be considered a legitimate header for data_rows. + """ + # See if the row has any non-text values. + for cell in header: + if not isinstance(cell.value, six.string_types) or _is_numeric(cell.value): + return False + + + # If it's all text, see if the values in the first row repeat in other rows. That's uncommon for + # a header. + count_repeats = [0 for cell in header] + for row in data_rows: + for cell, header_cell in zip(row, header): + if cell.value and cell.value == header_cell.value: + return False + + return True + +def _count_nonempty(row): + """ + Returns the count of cells in row, ignoring trailing empty cells. + """ + count = 0 + for i, c in enumerate(row): + if not c.empty: + count = i + 1 + return count + + +def find_first_non_empty_row(rows): + """ + Returns (data_offset, header) of the first row with non-empty fields + or (0, []) if there are no non-empty rows. + """ + for i, row in enumerate(rows): + if _count_nonempty(row) > 0: + return i + 1, row + # No non-empty rows. + return 0, [] + + +def expand_headers(headers, data_offset, rows): + """ + Returns expanded header to have enough columns for all rows in the given sample. + """ + row_length = max(itertools.chain([len(headers)], + (_count_nonempty(r) for r in itertools.islice(rows, data_offset, + None)))) + header_values = [h.value.strip() for h in headers] + [u''] * (row_length - len(headers)) + return header_values + + +def headers_guess(rows): + """ + Our own smarter version of messytables.headers_guess, which also guesses as to whether one of + the first rows is in fact a header. Returns (data_offset, headers) where data_offset is the + index of the first line of data, and headers is the list of guessed headers (which will contain + empty strings if the file had no headers). + """ + # Messytables guesses at the length of data rows, and then assumes that the first row that has + # close to that many non-empty fields is the header, where by "close" it means 1 less. + # + # For Grist, it's better to mistake headers for data than to mistake data for headers. Note that + # there is csv.Sniffer().has_header(), which tries to be clever, but it's messes up too much. + # + # We only consider for the header the first row with non-empty cells. It is a header if + # - it has no non-text fields + # - none of the fields have a value that repeats in that column of data + + # Find the first row with non-empty fields. + data_offset, header = find_first_non_empty_row(rows) + if not header: + return data_offset, header + + # Let's see if row is really a header. + if not _is_header(header, itertools.islice(rows, data_offset, None)): + data_offset -= 1 + header = [] + + # Expand header to have enough columns for all rows in the given sample. + header_values = expand_headers(header, data_offset, rows) + + return data_offset, header_values diff --git a/plugins/core/sandbox/import_xls.py b/plugins/core/sandbox/import_xls.py new file mode 100644 index 00000000..c46df822 --- /dev/null +++ b/plugins/core/sandbox/import_xls.py @@ -0,0 +1,118 @@ +""" +This module reads a file path that is passed in using ActiveDoc.importFile() +and returns a object formatted so that it can be used by grist for a bulk add records action +""" +import os +import csv +import itertools +import logging + +import chardet +import messytables +import six +from six.moves import zip + +import parse_data +import import_utils + + +log = logging.getLogger(__name__) + + +def import_file(file_source, parse_options): + path = import_utils.get_path(file_source["path"]) + orig_name = file_source["origName"] + parse_options, tables = parse_file(path, orig_name, parse_options) + return {"parseOptions": parse_options, "tables": tables} + +# messytable is painfully un-extensible, so we have to jump through dumb hoops to override any +# behavior. +orig_dialect = messytables.CSVRowSet._dialect +def override_dialect(self): + if self.delimiter == '\t': + return csv.excel_tab + return orig_dialect.fget(self) +messytables.CSVRowSet._dialect = property(override_dialect) + +def parse_file(file_path, orig_name, parse_options=None, table_name_hint=None, num_rows=None): + # pylint: disable=unused-argument + with open(file_path, "rb") as f: + try: + return parse_open_file(f, orig_name, table_name_hint=table_name_hint) + except Exception as e: + # Log the full error, but simplify the thrown error to omit the unhelpful extra args. + log.info("import_xls parse_file failed: %s", e) + if six.PY2 and e.args and isinstance(e.args[0], six.string_types): + raise Exception(e.args[0]) + raise + + +def parse_open_file(file_obj, orig_name, table_name_hint=None): + file_root, file_ext = os.path.splitext(orig_name) + table_set = messytables.any.any_tableset(file_obj, extension=file_ext, auto_detect=False) + + # Messytable's encoding detection uses too small a sample, so we override it here. + if isinstance(table_set, messytables.CSVTableSet): + sample = file_obj.read(100000) + table_set.encoding = chardet.detect(sample)['encoding'] + # In addition, always prefer UTF8 over ASCII. + if table_set.encoding == 'ascii': + table_set.encoding = 'utf8' + + export_list = [] + # A table set is a collection of tables: + for row_set in table_set.tables: + table_name = row_set.name + + if isinstance(row_set, messytables.CSVRowSet): + # For csv files, we can do better for table_name by using the filename. + table_name = import_utils.capitalize(table_name_hint or + os.path.basename(file_root.decode('utf8'))) + + # Messytables doesn't guess whether headers are present, so we need to step in. + data_offset, headers = import_utils.headers_guess(list(row_set.sample)) + else: + # Let messytables guess header names and the offset of the header. + offset, headers = messytables.headers_guess(row_set.sample) + data_offset = offset + 1 # Add the header line + + # Make sure all header values are strings. + for i, header in enumerate(headers): + if not isinstance(header, six.string_types): + headers[i] = six.text_type(header) + + log.debug("Guessed data_offset as %s", data_offset) + log.debug("Guessed headers as: %s", headers) + + row_set.register_processor(messytables.offset_processor(data_offset)) + + + table_data_with_types = parse_data.get_table_data(row_set, len(headers)) + + # Identify and remove empty columns, and populate separate metadata and data lists. + column_metadata = [] + table_data = [] + for col_data, header in zip(table_data_with_types, headers): + if not header and all(val == "" for val in col_data["data"]): + continue # empty column + data = col_data.pop("data") + col_data["id"] = header + column_metadata.append(col_data) + table_data.append(data) + + if not table_data: + # Don't add tables with no columns. + continue + + log.info("Output table %r with %d columns", table_name, len(column_metadata)) + for c in column_metadata: + log.debug("Output column %s", c) + export_list.append({ + "table_name": table_name, + "column_metadata": column_metadata, + "table_data": table_data + }) + + parse_options = {} + + return parse_options, export_list diff --git a/plugins/core/sandbox/main.py b/plugins/core/sandbox/main.py new file mode 100644 index 00000000..e5f88e12 --- /dev/null +++ b/plugins/core/sandbox/main.py @@ -0,0 +1,25 @@ +import logging +import sandbox + +import import_csv +import import_xls +import import_json + +def main(): + s = logging.StreamHandler() + s.setFormatter(logging.Formatter(fmt='%(asctime)s.%(msecs)03d %(message)s', + datefmt='%Y-%m-%d %H:%M:%S')) + rootLogger = logging.getLogger() + rootLogger.addHandler(s) + rootLogger.setLevel(logging.INFO) + + # Todo: Grist should expose a register method accepting arguments as + # follow: register('csv_parser', 'canParse', can_parse) + sandbox.register("csv_parser.parseFile", import_csv.parse_file_source) + sandbox.register("xls_parser.parseFile", import_xls.import_file) + sandbox.register("json_parser.parseFile", import_json.parse_file) + + sandbox.run() + +if __name__ == "__main__": + main() diff --git a/plugins/core/sandbox/parse_data.py b/plugins/core/sandbox/parse_data.py new file mode 100644 index 00000000..4426d585 --- /dev/null +++ b/plugins/core/sandbox/parse_data.py @@ -0,0 +1,299 @@ +""" +This module implements a way to detect and convert types that's better than messytables (at least +in some relevant cases). + +It has a simple interface: get_table_data(row_set) which returns a list of columns, each a +dictionary with "type" and "data" fields, where "type" is a Grist type string, and data is a list +of values. All "data" lists will have the same length. +""" + +import dateguess +import datetime +import logging +import re +import messytables +import moment # TODO grist internal libraries might not be available to plugins in the future. +import dateutil.parser as date_parser +import six +from six.moves import zip, xrange + +log = logging.getLogger(__name__) + + +# Typecheck using type(value) instead of isinstance(value, some_type) makes parsing 25% faster +# pylint:disable=unidiomatic-typecheck + + +# Our approach to type detection is different from that of messytables. +# We first go through each cell in a sample of rows, trying to convert it to each of the basic +# types, and keep a count of successes for each. We use the counts to decide the basic types (e.g. +# numeric vs text). Then we go through the full data set converting to the chosen basic type. +# During this process, we keep counts of suitable Grist types to consider (e.g. Int vs Numeric). +# We use those counts to produce the selected Grist type at the end. + + +class BaseConverter(object): + @classmethod + def test(cls, value): + try: + cls.convert(value) + return True + except Exception: + return False + + @classmethod + def convert(cls, value): + """Implement to convert imported value to a basic type.""" + raise NotImplementedError() + + @classmethod + def get_grist_column(cls, values): + """ + Given an array of values returned successfully by convert(), return a tuple of + (grist_type_string, grist_values), where grist_values is an array of values suitable for the + returned grist type. + """ + raise NotImplementedError() + + +class NumericConverter(BaseConverter): + """Handles numeric values, including Grist types Numeric and Int.""" + + # A number matching this is probably an identifier of some sort. Converting it to a float will + # lose precision, so it's better not to consider it numeric. + _unlikely_float = re.compile(r'\d{17}|^0\d') + + # Integers outside this range will be represented as floats. This is the limit for values that can + # be stored in a JS Int32Array. + _max_js_int = 1<<31 + + # The thousands separator. It should be locale-specific, but we don't currently have a way to + # detect locale from the data. (Also, the sandbox's locale module isn't fully functional.) + _thousands_sep = ',' + + @classmethod + def convert(cls, value): + if type(value) in six.integer_types + (float, complex): + return value + if type(value) in (str, six.text_type) and not cls._unlikely_float.search(value): + return float(value.strip().lstrip('$').replace(cls._thousands_sep, "")) + raise ValueError() + + @classmethod + def _is_integer(cls, value): + ttype = type(value) + if ttype == int or (ttype == float and value.is_integer()): + return -cls._max_js_int <= value < cls._max_js_int + return False + + @classmethod + def get_grist_column(cls, values): + if all(cls._is_integer(v) for v in values): + return ("Int", [int(v) for v in values]) + return ("Numeric", values) + + +class DateParserInfo(date_parser.parserinfo): + def validate(self, res): + # Avoid this bogus combination which accepts plain numbers. + if res.day and not res.month: + return False + return super(DateParserInfo, self).validate(res) + + +class SimpleDateTimeConverter(BaseConverter): + """Handles Date and DateTime values which are already instances of datetime.datetime.""" + + @classmethod + def convert(cls, value): + if type(value) is datetime.datetime: + return value + elif value == "": + return None + raise ValueError() + + @classmethod + def _is_date(cls, value): + return value is None or value.time() == datetime.time() + + @classmethod + def get_grist_column(cls, values): + grist_type = "Date" if all(cls._is_date(v) for v in values) else "DateTime" + grist_values = [(v if (v is None) else moment.dt_to_ts(v)) + for v in values] + return grist_type, grist_values + + +class DateTimeCoverter(BaseConverter): + """Handles dateformats by guessed format.""" + + def __init__(self, date_format): + self._format = date_format + + def convert(self, value): + if value == "": + return None + if type(value) in (str, six.text_type): + # datetime.strptime doesn't handle %z and %Z tags in Python 2. + if '%z' in self._format or '%Z' in self._format: + return date_parser.parse(value) + else: + try: + return datetime.datetime.strptime(value, self._format) + except ValueError: + return date_parser.parse(value) + + raise ValueError() + + def _is_date(self, value): + return value is None or value.time() == datetime.time() + + def get_grist_column(self, values): + grist_type = "Date" if all(self._is_date(v) for v in values) else "DateTime" + grist_values = [(v if (v is None) else moment.dt_to_ts(v)) + for v in values] + return grist_type, grist_values + + +class BoolConverter(BaseConverter): + """Handles Boolean type.""" + + _true_values = (1, '1', 'true', 'yes') + _false_values = (0, '0', 'false', 'no') + + @classmethod + def convert(cls, value): + v = value.strip().lower() if type(value) in (str, six.text_type) else value + if v in cls._true_values: + return True + elif v in cls._false_values: + return False + raise ValueError() + + @classmethod + def get_grist_column(cls, values): + return ("Bool", values) + + +class TextConverter(BaseConverter): + """Fallback converter that converts everything to strings.""" + @classmethod + def convert(cls, value): + return six.text_type(value) + + @classmethod + def get_grist_column(cls, values): + return ("Text", values) + + +class ColumnDetector(object): + """ + ColumnDetector accepts calls to `add_value()`, and keeps track of successful conversions to + different basic types. At the end `get_converter()` method returns the class of the most + suitable converter. + """ + # Converters are listed in the order of preference, which is only used if two converters succeed + # on the same exact number of values. Text is always a fallback. + converters = [SimpleDateTimeConverter, BoolConverter, NumericConverter] + + # If this many non-junk values or more can't be converted, fall back to text. + _text_threshold = 0.10 + + # Junk values: these aren't counted when deciding whether to fall back to text. + _junk_re = re.compile(r'^\s*(|-+|\?+|n/?a)\s*$', re.I) + + def __init__(self): + self._counts = [0] * len(self.converters) + self._count_nonjunk = 0 + self._count_total = 0 + self._data = [] + + def add_value(self, value): + self._count_total += 1 + if value is None or (type(value) in (str, six.text_type) and self._junk_re.match(value)): + return + + self._data.append(value) + + self._count_nonjunk += 1 + for i, conv in enumerate(self.converters): + if conv.test(value): + self._counts[i] += 1 + + def get_converter(self): + if sum(self._counts) == 0: + # if not already guessed as int, bool or datetime then we should try to guess date pattern + str_data = [d for d in self._data if isinstance(d, six.string_types)] + data_formats = dateguess.guess_bulk(str_data, error_rate=self._text_threshold) + data_format = data_formats[0] if data_formats else None + if data_format: + return DateTimeCoverter(data_format) + + # We find the max by count, and secondarily by minimum index in the converters list. + count, neg_index = max((c, -i) for (i, c) in enumerate(self._counts)) + if count > 0 and count >= self._count_nonjunk * (1 - self._text_threshold): + return self.converters[-neg_index] + return TextConverter + + +def _guess_basic_types(rows, num_columns): + column_detectors = [ColumnDetector() for i in xrange(num_columns)] + for row in rows: + for cell, detector in zip(row, column_detectors): + detector.add_value(cell.value) + + return [detector.get_converter() for detector in column_detectors] + + +class ColumnConverter(object): + """ + ColumnConverter converts and collects values using the passed-in converter object. At the end + `get_grist_column()` method returns a column of converted data. + """ + def __init__(self, converter): + self._converter = converter + self._all_col_values = [] # Initially this has None's for converted values + self._converted_values = [] # A list of all converted values + self._converted_indices = [] # Indices of the converted values into self._all_col_values + + def convert_and_add(self, value): + # For some reason, we get 'str' type rather than 'unicode' for empty strings. + # Correct this, since all text should be unicode. + value = u"" if value == "" else value + try: + conv = self._converter.convert(value) + self._converted_values.append(conv) + self._converted_indices.append(len(self._all_col_values)) + self._all_col_values.append(None) + except Exception: + self._all_col_values.append(six.text_type(value)) + + def get_grist_column(self): + """ + Returns a dictionary {"type": grist_type, "data": grist_value_array}. + """ + grist_type, grist_values = self._converter.get_grist_column(self._converted_values) + for i, v in zip(self._converted_indices, grist_values): + self._all_col_values[i] = v + return {"type": grist_type, "data": self._all_col_values} + + +def get_table_data(row_set, num_columns, num_rows=0): + converters = _guess_basic_types(row_set.sample, num_columns) + col_converters = [ColumnConverter(c) for c in converters] + for num, row in enumerate(row_set): + if num_rows and num == num_rows: + break + + if num % 10000 == 0: + log.info("Processing row %d", num) + + # Make sure we have a value for every column. + missing_values = len(converters) - len(row) + if missing_values > 0: + row.extend([messytables.Cell("")] * missing_values) + + for cell, conv in zip(row, col_converters): + conv.convert_and_add(cell.value) + + return [conv.get_grist_column() for conv in col_converters] diff --git a/plugins/core/sandbox/test/fixtures/strange_dates.xlsx b/plugins/core/sandbox/test/fixtures/strange_dates.xlsx new file mode 100644 index 00000000..8219e1fe Binary files /dev/null and b/plugins/core/sandbox/test/fixtures/strange_dates.xlsx differ diff --git a/plugins/core/sandbox/test/fixtures/test_excel.xlsx b/plugins/core/sandbox/test/fixtures/test_excel.xlsx new file mode 100644 index 00000000..8ce64289 Binary files /dev/null and b/plugins/core/sandbox/test/fixtures/test_excel.xlsx differ diff --git a/plugins/core/sandbox/test/fixtures/test_excel_types.csv b/plugins/core/sandbox/test/fixtures/test_excel_types.csv new file mode 100644 index 00000000..c6ba02be --- /dev/null +++ b/plugins/core/sandbox/test/fixtures/test_excel_types.csv @@ -0,0 +1 @@ +int1,int2,textint,bigint,num2,bignum,date1,date2,datetext,datetimetext -1234123,5,12345678902345689,320150170634561830,123456789.1234560000,7.22597E+86,12/22/15 11:59 AM,"December 20, 2015",12/22/2015,12/22/2015 00:00:00 ,,,,,,,,,12/22/2015 13:15:00 ,,,,,,,,,02/27/2018 16:08:39 \ No newline at end of file diff --git a/plugins/core/sandbox/test/fixtures/test_excel_types.xlsx b/plugins/core/sandbox/test/fixtures/test_excel_types.xlsx new file mode 100644 index 00000000..f8e216aa Binary files /dev/null and b/plugins/core/sandbox/test/fixtures/test_excel_types.xlsx differ diff --git a/plugins/core/sandbox/test/fixtures/test_import_csv.csv b/plugins/core/sandbox/test/fixtures/test_import_csv.csv new file mode 100644 index 00000000..5931fac8 --- /dev/null +++ b/plugins/core/sandbox/test/fixtures/test_import_csv.csv @@ -0,0 +1,5 @@ +FIRST_NAME,LAST_NAME,PHONE,VALUE,DATE +John,Moor,201-343-3434,45,2018-02-27 16:08:39 +0000 +Tim,Kale,201.343.3434,4545,2018-02-27 16:08:39 +0100 +Jenny,Jo,2013433434,0,2018-02-27 16:08:39 -0100 +Lily,Smit,(201)343-3434,4, diff --git a/plugins/core/sandbox/test/fixtures/test_single_merged_cell.xlsx b/plugins/core/sandbox/test/fixtures/test_single_merged_cell.xlsx new file mode 100644 index 00000000..5fc175e5 Binary files /dev/null and b/plugins/core/sandbox/test/fixtures/test_single_merged_cell.xlsx differ diff --git a/plugins/core/sandbox/test_dateguess.py b/plugins/core/sandbox/test_dateguess.py new file mode 100644 index 00000000..3c83203f --- /dev/null +++ b/plugins/core/sandbox/test_dateguess.py @@ -0,0 +1,102 @@ +import unittest +from dateguess import guess, guess_bulk + + +class TestGuesser(unittest.TestCase): + def assertDate(self, input_str, fmt_list): + guessed = guess(input_str) + self.assertEqual(set(guessed), set(fmt_list)) + + def assertDates(self, input_lst, error_rate, fmt_list): + guessed = guess_bulk(input_lst, error_rate=error_rate) + self.assertEqual(set(guessed), set(fmt_list)) + + def test_guess_dates(self): + self.assertDate('', []) + self.assertDate("2013-13-13", []) + self.assertDate("25/25/1911", []) + + self.assertDate("2014-01-11", ['%Y-%m-%d', '%Y-%d-%m']) + self.assertDate("2014-11-01", ['%Y-%m-%d', '%Y-%d-%m']) + self.assertDate("1990-05-05", ['%Y-%m-%d', '%Y-%d-%m']) + self.assertDate("2013-12-13", ['%Y-%m-%d']) + + self.assertDate("12/31/1999", ['%m/%d/%Y']) + self.assertDate("11/11/1911", ['%m/%d/%Y', '%d/%m/%Y']) + self.assertDate("5/9/1981", ['%m/%d/%Y', '%d/%m/%Y']) + self.assertDate("6/3/1985", ['%m/%d/%Y', '%d/%m/%Y']) + + self.assertDate("12/31/99", ['%m/%d/%y']) + self.assertDate("11/11/11", ['%y/%m/%d', '%y/%d/%m', '%m/%d/%y', '%d/%m/%y']) + self.assertDate("5/9/81", ['%m/%d/%y', '%d/%m/%y']) + self.assertDate("6/3/85", ['%m/%d/%y', '%d/%m/%y']) + + self.assertDate("31.12.91", ['%d.%m.%y']) + self.assertDate("4.4.87", ['%m.%d.%y', '%d.%m.%y']) + + self.assertDate("13.2.8", ['%y.%m.%d', '%y.%d.%m']) + self.assertDate("31.12.1991", ['%d.%m.%Y']) + self.assertDate("4.4.1987", ['%m.%d.%Y', '%d.%m.%Y']) + self.assertDate("13.2.2008", ['%d.%m.%Y']) + self.assertDate("31.12.91", ['%d.%m.%y']) + self.assertDate("4.4.87", ['%m.%d.%y', '%d.%m.%y']) + self.assertDate("13.2.8", ['%y.%m.%d', '%y.%d.%m']) + + self.assertDate("9 May 1981", ['%d %b %Y', '%d %B %Y']) + self.assertDate("31 Dec 1999", ['%d %b %Y']) + self.assertDate("1 Jan 2012", ['%d %b %Y']) + self.assertDate("3 August 2009", ['%d %B %Y']) + self.assertDate("2 May 1980", ['%d %B %Y', '%d %b %Y']) + + self.assertDate("13/1/2012", ['%d/%m/%Y']) + + self.assertDate("Aug 1st 2014", ['%b %dst %Y']) + self.assertDate("12/22/2015 00:00:00.10", ['%m/%d/%Y %H:%M:%S.%f']) + + def test_guess_datetimes(self): + self.assertDate("Thu Sep 25 10:36:28 2003", ['%a %b %d %H:%M:%S %Y']) + self.assertDate("Thu Sep 25 2003 10:36:28", ['%a %b %d %Y %H:%M:%S']) + self.assertDate("10:36:28 Thu Sep 25 2003", ['%H:%M:%S %a %b %d %Y']) + + self.assertDate("2014-01-11T12:21:05", ['%Y-%m-%dT%H:%M:%S', '%Y-%d-%mT%H:%M:%S']) + self.assertDate("2015-02-16T16:05:31", ['%Y-%m-%dT%H:%M:%S']) + # TODO remove all except first one + self.assertDate("2015-02-16T16:05", ['%Y-%m-%dT%H:%M', '%Y-%H-%MT%d:%m', + '%Y-%m-%HT%M:%d', '%Y-%d-%HT%M:%m']) + self.assertDate("2015-02-16T16", ['%Y-%m-%dT%H', '%Y-%m-%HT%d']) #TODO remove second one + + self.assertDate("Mon Jan 13 9:52:52 am MST 2014", ['%a %b %d %I:%M:%S %p %Z %Y']) + self.assertDate("Tue Jan 21 3:30:00 PM EST 2014", ['%a %b %d %I:%M:%S %p %Z %Y']) + self.assertDate("Mon Jan 13 09:52:52 MST 2014", ['%a %b %d %H:%M:%S %Z %Y']) + self.assertDate("Tue Jan 21 15:30:00 EST 2014", ['%a %b %d %H:%M:%S %Z %Y']) + self.assertDate("Mon Jan 13 9:52 am MST 2014", ['%a %b %d %I:%M %p %Z %Y']) + self.assertDate("Tue Jan 21 3:30 PM EST 2014", ['%a %b %d %I:%M %p %Z %Y']) + + self.assertDate("2014-01-11T12:21:05", ['%Y-%m-%dT%H:%M:%S', '%Y-%d-%mT%H:%M:%S']) + self.assertDate("2015-02-16T16:05:31", ['%Y-%m-%dT%H:%M:%S']) + self.assertDate("Thu Sep 25 10:36:28 2003", ['%a %b %d %H:%M:%S %Y']) + self.assertDate("10:36:28 Thu Sep 25 2003", ['%H:%M:%S %a %b %d %Y']) + + self.assertDate("2014-01-11T12:21:05+0000", ['%Y-%d-%mT%H:%M:%S%z', '%Y-%m-%dT%H:%M:%S%z']) + self.assertDate("2015-02-16T16:05:31-0400", ['%Y-%m-%dT%H:%M:%S%z']) + self.assertDate("Thu, 25 Sep 2003 10:49:41 -0300", ['%a, %d %b %Y %H:%M:%S %z']) + self.assertDate("Thu, 25 Sep 2003 10:49:41 +0300", ['%a, %d %b %Y %H:%M:%S %z']) + + self.assertDate("2003-09-25T10:49:41", ['%Y-%m-%dT%H:%M:%S']) + self.assertDate("2003-09-25T10:49", ['%Y-%m-%dT%H:%M']) + + def test_guess_bulk_dates(self): + self.assertDates(["11/11/1911", "25/11/1911", "11/11/1911", "11/11/1911"], 0.0, ['%d/%m/%Y']) + self.assertDates(["25/11/1911", "25/25/1911", "11/11/1911", "11/11/1911"], 0.0, []) + self.assertDates(["25/11/1911", "25/25/1911", "11/11/1911", "11/11/1911"], 0.5, ['%d/%m/%Y']) + + self.assertDates(["25/11/1911", "25/25/1911", "11/11/1911", "11/11/1911"], 0.1, []) + self.assertDates(["23/11/1911", '2004 May 12', "11/11/1911", "11/11/1911"], 0.5, ['%d/%m/%Y']) + + self.assertDates(['2004 May 12', "11/11/1911", "11/11/1911", "23/11/1911"], 0.5, ['%d/%m/%Y']) + self.assertDates(['2004 May 12', "11/11/1911", "11/11/1911", "23/11/1911"], 0.0, []) + self.assertDates(['12/22/2015', "12/22/2015 1:15pm", "2018-02-27 16:08:39 +0000"], 0.1, []) + + +if __name__ == "__main__": + unittest.main() diff --git a/plugins/core/sandbox/test_import_csv.py b/plugins/core/sandbox/test_import_csv.py new file mode 100644 index 00000000..9d3a63d6 --- /dev/null +++ b/plugins/core/sandbox/test_import_csv.py @@ -0,0 +1,341 @@ +# This Python file uses the following encoding: utf-8 +# Run tests with: +# +# ./sandbox/nacl/bin/sel_ldr -E PYTHONPATH=/grist:/thirdparty -B ./sandbox/nacl/lib/irt_core.nexe -l /dev/null -m ./sandbox/nacl/root:/:ro -m ./plugins/core/sandbox:/sandbox:ro ./sandbox/nacl/lib/runnable-ld.so --library-path /slib /python/bin/python2.7.nexe -m unittest discover -v -s /sandbox #pylint: disable=line-too-long +# +# +# TODO: run test automatically +# +import math +import os +import textwrap +import unittest +from six import BytesIO, text_type +import csv +import calendar +import datetime + +import import_csv + + +def _get_fixture(filename): + return os.path.join(os.path.dirname(__file__), "test/fixtures", filename) + + +def bytes_io_from_str(string): + if isinstance(string, text_type): + string = string.encode("utf8") + return BytesIO(string) + + +class TestImportCSV(unittest.TestCase): + + def _check_col(self, sheet, index, name, typename, values): + self.assertEqual(sheet["column_metadata"][index]["id"], name) + self.assertEqual(sheet["column_metadata"][index]["type"], typename) + self.assertEqual(sheet["table_data"][index], values) + + def _check_num_cols(self, sheet, exp_cols): + self.assertEqual(len(sheet["column_metadata"]), exp_cols) + self.assertEqual(len(sheet["table_data"]), exp_cols) + + + def test_csv_types(self): + parsed_file = import_csv.parse_file(_get_fixture('test_excel_types.csv'), parse_options='') + sheet = parsed_file[1][0] + + self._check_col(sheet, 0, "int1", "Int", [-1234123, '', '']) + self._check_col(sheet, 1, "int2", "Int", [5, '', '']) + self._check_col(sheet, 2, "textint", "Text", ["12345678902345689", '', '']) + self._check_col(sheet, 3, "bigint", "Text", ["320150170634561830", '', '']) + self._check_col(sheet, 4, "num2", "Numeric", [123456789.123456, '', '']) + self._check_col(sheet, 5, "bignum", "Numeric", [7.22597e+86, '', '']) + self._check_col(sheet, 6, "date1", "DateTime", + [calendar.timegm(datetime.datetime(2015, 12, 22, 11, 59, 00).timetuple()), None, None]) + self._check_col(sheet, 7, "date2", "Date", + [calendar.timegm(datetime.datetime(2015, 12, 20, 0, 0, 0).timetuple()), None, None]) + self._check_col(sheet, 8, "datetext", "Date", + [calendar.timegm(datetime.date(2015, 12, 22).timetuple()), None, None]) + self._check_col(sheet, 9, "datetimetext", "DateTime", + [calendar.timegm(datetime.datetime(2015, 12, 22, 0, 0, 0).timetuple()), + calendar.timegm(datetime.datetime(2015, 12, 22, 13, 15, 0).timetuple()), + calendar.timegm(datetime.datetime(2018, 2, 27, 16, 8, 39).timetuple())]) + + + def test_user_parse_options(self): + options = {u'parse_options': {"escapechar": None, "include_col_names_as_headers": True, + "lineterminator": "\n", "skipinitialspace": False, + "limit_rows": False, "quoting": 0, "start_with_row": 1, + "delimiter": ",", "NUM_ROWS":10, + "quotechar": "\"", "doublequote":True}} + parsed_file = import_csv.parse_file(_get_fixture('test_import_csv.csv'), + **options)[1][0] + self._check_num_cols(parsed_file, 5) + self._check_col(parsed_file, 0, "FIRST_NAME", "Text", ['John', 'Tim', 'Jenny', 'Lily']) + self._check_col(parsed_file, 1, "LAST_NAME", "Text", ['Moor', 'Kale', 'Jo', 'Smit']) + self._check_col(parsed_file, 2, "PHONE", "Text", ['201-343-3434', '201.343.3434', + '2013433434', '(201)343-3434']) + self._check_col(parsed_file, 3, "VALUE", "Int", [45, 4545, 0, 4]) + self._check_col(parsed_file, 4, "DATE", "DateTime", [1519747719.0, 1519744119.0, 1519751319.0, None]) + + def test_wrong_cols1(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1, name2, name3 + a1,b1,c1 + a2,b2 + a3 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "name2", "Text", ["b1", "b2", ""]) + self._check_col(parsed_file, 2, "name3", "Text", ["c1", "", ""]) + + def test_wrong_cols2(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1 + a1,b1 + a2,b2,c2 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2"]) + self._check_col(parsed_file, 1, "", "Text", ["b1", "b2"]) + self._check_col(parsed_file, 2, "", "Text", ["", "c2"]) + + def test_offset(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + ,,,,,,, + name1,name2,name3 + a1,b1,c1 + a2,b2,c2 + a3,b3,c3,d4 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 4) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "name2", "Text", ["b1", "b2", "b3"]) + self._check_col(parsed_file, 2, "name3", "Text", ["c1", "c2", "c3"]) + self._check_col(parsed_file, 3, "", "Text", ["", "", "d4"]) + + def test_offset_no_header(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + 4,b1,c1 + 4,b2,c2 + 4,b3,c3 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "", "Int", [4, 4, 4]) + self._check_col(parsed_file, 1, "", "Text", ["b1", "b2", "b3"]) + self._check_col(parsed_file, 2, "", "Text", ["c1", "c2", "c3"]) + + def test_empty_headers(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + ,,-,- + b,a,a,a,a + b,a,a,a,a + b,a,a,a,a + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 5) + self._check_col(parsed_file, 0, "", "Text", ["b", "b", "b"]) + self._check_col(parsed_file, 1, "", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 2, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 3, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 4, "", "Text", ["a", "a", "a"]) + + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + -,-,-,-,-,- + b,a,a,a,a + b,a,a,a,a + b,a,a,a,a + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 6) + self._check_col(parsed_file, 0, "-", "Text", ["b", "b", "b"]) + self._check_col(parsed_file, 1, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 2, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 3, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 4, "-", "Text", ["a", "a", "a"]) + self._check_col(parsed_file, 5, "-", "Text", ["", "", ""]) + + def test_guess_missing_user_option(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1,;name2,;name3 + a1,;b1,;c1 + a2,;b2,;c2 + a3,;b3,;c3 + """)) + parse_options = {"delimiter": ';', + "escapechar": None, + "lineterminator": '\r\n', + "quotechar": '"', + "quoting": csv.QUOTE_MINIMAL} + + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1,", "Text", ["a1,", "a2,", "a3,"]) + self._check_col(parsed_file, 1, "name2,", "Text", ["b1,", "b2,", "b3,"]) + self._check_col(parsed_file, 2, "name3", "Text", ["c1", "c2", "c3"]) + + # Sniffer detects delimiters in order [',', '\t', ';', ' ', ':'], + # so for this file_obj it will be ',' + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2", "a3"]) + self._check_col(parsed_file, 1, ";name2", "Text", [";b1", ";b2", ";b3"]) + self._check_col(parsed_file, 2, ";name3", "Text", [";c1", ";c2", ";c3"]) + + def test_one_line_file_no_header(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + 2,name2,name3 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "", "Int", [2]) + self._check_col(parsed_file, 1, "", "Text", ["name2"]) + self._check_col(parsed_file, 2, "", "Text", ["name3"]) + + def test_one_line_file_with_header(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1,name2,name3 + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={})[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", []) + self._check_col(parsed_file, 1, "name2", "Text", []) + self._check_col(parsed_file, 2, "name3", "Text", []) + + def test_empty_file(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + """)) + + parsed_file = import_csv._parse_open_file(file_obj, parse_options={}) + self.assertEqual(parsed_file, ({}, [])) + + def test_option_num_rows(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1,name2,name3 + a1,b1,c1 + a2,b2,c2 + a3,b3,c3 + """)) + + parse_options = {} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ['a1', 'a2', 'a3']) + self._check_col(parsed_file, 1, "name2", "Text", ['b1', 'b2', 'b3']) + self._check_col(parsed_file, 2, "name3", "Text", ['c1', 'c2', 'c3']) + + parse_options = {"NUM_ROWS": 2} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2"]) + self._check_col(parsed_file, 1, "name2", "Text", ["b1", "b2"]) + self._check_col(parsed_file, 2, "name3", "Text", ["c1", "c2"]) + + parse_options = {"NUM_ROWS": 10} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ['a1', 'a2', 'a3']) + self._check_col(parsed_file, 1, "name2", "Text", ['b1', 'b2', 'b3']) + self._check_col(parsed_file, 2, "name3", "Text", ['c1', 'c2', 'c3']) + + def test_option_num_rows_no_header(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + ,, + ,, + a1,1,c1 + a2,2,c2 + a3,3,c3 + """)) + + parse_options = {} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "", "Text", ['a1', 'a2', 'a3']) + self._check_col(parsed_file, 1, "", "Int", [1, 2, 3]) + self._check_col(parsed_file, 2, "", "Text", ['c1', 'c2', 'c3']) + + parse_options = {"NUM_ROWS": 2} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "", "Text", ['a1', 'a2']) + self._check_col(parsed_file, 1, "", "Int", [1, 2]) + self._check_col(parsed_file, 2, "", "Text", ['c1', 'c2']) + + def test_option_use_col_name_as_header(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + name1,name2,name3 + a1,1,c1 + a2,2,c2 + a3,3,c3 + """)) + + parse_options = {"include_col_names_as_headers": False} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "", "Text", ["name1", "a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "", "Text", ["name2", "1", "2", "3"]) + self._check_col(parsed_file, 2, "", "Text", ["name3", "c1", "c2", "c3"]) + + parse_options = {"include_col_names_as_headers": True} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 3) + self._check_col(parsed_file, 0, "name1", "Text", ["a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "name2", "Int", [1, 2, 3]) + self._check_col(parsed_file, 2, "name3", "Text", ["c1", "c2", "c3"]) + + def test_option_use_col_name_as_header_no_headers(self): + file_obj = bytes_io_from_str(textwrap.dedent( + """\ + ,,, + ,,, + n1,2,n3 + a1,1,c1,d1 + a2,4,c2 + a3,5,c3 + """)) + + parse_options = {"include_col_names_as_headers": False} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 4) + self._check_col(parsed_file, 0, "", "Text", ["n1", "a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "", "Int", [2, 1, 4, 5]) + self._check_col(parsed_file, 2, "", "Text", ["n3", "c1", "c2", "c3"]) + self._check_col(parsed_file, 3, "", "Text", ["", "d1", "", ""]) + + parse_options = {"include_col_names_as_headers": True} + parsed_file = import_csv._parse_open_file(file_obj, parse_options=parse_options)[1][0] + self._check_num_cols(parsed_file, 4) + self._check_col(parsed_file, 0, "n1", "Text", ["a1", "a2", "a3"]) + self._check_col(parsed_file, 1, "2", "Int", [1, 4, 5]) + self._check_col(parsed_file, 2, "n3", "Text", ["c1", "c2", "c3"]) + self._check_col(parsed_file, 3, "", "Text", [ "d1", "", ""]) + + +if __name__ == '__main__': + unittest.main() diff --git a/plugins/core/sandbox/test_import_json.py b/plugins/core/sandbox/test_import_json.py new file mode 100644 index 00000000..0101a3d4 --- /dev/null +++ b/plugins/core/sandbox/test_import_json.py @@ -0,0 +1,259 @@ +from unittest import TestCase +import import_json + +class TestImportJSON(TestCase): + + maxDiff = None + + def test_simple_json_array(self): + grist_tables = import_json.dumps([{'a': 1, 'b': 'baba'}, {'a': 4, 'b': 'abab'}], '') + self.assertEqual(grist_tables['tables'], [{ + 'column_metadata': [ + {'id': 'a', 'type': 'Int'}, {'id': 'b', 'type': 'Text'}], + 'table_data': [[1, 4], ['baba', 'abab']], + 'table_name': '' + }]) + + def test_missing_data(self): + grist_tables = import_json.dumps([{'a': 1}, {'b': 'abab'}, {'a': 4}]) + self.assertEqual(grist_tables['tables'], [{ + 'column_metadata': [ + {'id': 'a', 'type': 'Int'}, {'id': 'b', 'type': 'Text'}], + 'table_data': [[1, None, 4], [None, 'abab', None]], + 'table_name': '' + }]) + + def test_even_more_simple_array(self): + self.assertEqual( + import_json.dumps(['apple', 'pear', 'banana'], '')['tables'], + [{ + 'column_metadata': [ + {'id': '', 'type': 'Text'}], + 'table_data': [['apple', 'pear', 'banana']], + 'table_name': '' + }]) + + def test_mixing_simple_and_even_more_simple(self): + self.assertEqual( + import_json.dumps(['apple', 'pear', {'a': 'some cucumbers'}, 'banana'], '')['tables'], + [{ + 'column_metadata': [ + {'id': '', 'type': 'Text'}, + {'id': 'a', 'type': 'Text'}], + 'table_data': [['apple', 'pear', None, 'banana'], [None, None, 'some cucumbers', None]], + 'table_name': '' + }]) + + def test_array_with_reference(self): + # todo: reference should follow Grist's format + self.assertEqual( + import_json.dumps([{'a': {'b': 2}, 'c': 'foo'}], 'Hello')['tables'], + [{ + 'column_metadata': [ + {'id': 'a', 'type': 'Ref:Hello_a'}, {'id': 'c', 'type': 'Text'} + ], + 'table_data': [[1], ['foo']], + 'table_name': 'Hello' + }, { + 'column_metadata': [ + {'id': 'b', 'type': 'Int'} + ], + 'table_data': [[2]], + 'table_name': 'Hello_a' + }]) + + def test_nested_nested_object(self): + self.assertEqual( + import_json.dumps([{'a': {'b': 2, 'd': {'a': 'sugar'}}, 'c': 'foo'}], 'Hello')['tables'], + [{ + 'column_metadata': [ + {'id': 'a', 'type': 'Ref:Hello_a'}, {'id': 'c', 'type': 'Text'} + ], + 'table_data': [[1], ['foo']], + 'table_name': 'Hello' + }, { + 'column_metadata': [ + {'id': 'b', 'type': 'Int'}, {'id': 'd', 'type': 'Ref:Hello_a_d'} + ], + 'table_data': [[2], [1]], + 'table_name': 'Hello_a' + }, { + 'column_metadata': [ + {'id': 'a', 'type': 'Text'} + ], + 'table_data': [['sugar']], + 'table_name': 'Hello_a_d' + }]) + + + def test_array_with_list(self): + self.assertEqual( + import_json.dumps([{'a': ['ES', 'FR', 'US']}, {'a': ['FR']}], 'Hello')['tables'], + [{ + 'column_metadata': [], + 'table_data': [], + 'table_name': 'Hello' + }, { + 'column_metadata': [{'id': '', 'type': 'Text'}, {'id': 'Hello', 'type': 'Ref:Hello'}], + 'table_data': [['ES', 'FR', 'US', 'FR'], [1, 1, 1, 2]], + 'table_name': 'Hello_a' + }]) + + def test_array_with_list_of_dict(self): + self.assertEqual( + import_json.dumps([{'a': [{'b': 1}, {'b': 4}]}, {'c': 2}], 'Hello')['tables'], + [ { + 'column_metadata': [{'id': 'c', 'type': 'Int'}], + 'table_data': [[None, 2]], + 'table_name': 'Hello' + }, { + 'column_metadata': [ + {'id': 'b', 'type': 'Int'}, + {'id': 'Hello', 'type': 'Ref:Hello'} + ], + 'table_data': [[1, 4], [1, 1]], + 'table_name': 'Hello_a' + }]) + + + def test_array_of_array(self): + self.assertEqual( + import_json.dumps([['FR', 'US'], ['ES', 'CH']], 'Hello')['tables'], + [{ + 'column_metadata': [], + 'table_data': [], + 'table_name': 'Hello' + }, { + 'column_metadata': [{'id': '', 'type': 'Text'}, {'id': 'Hello', 'type': 'Ref:Hello'}], + 'table_data': [['FR', 'US', 'ES', 'CH'], [1, 1, 2, 2]], + 'table_name': 'Hello_' + }, ]) + + + def test_json_dict(self): + self.assertEqual( + import_json.dumps({ + 'foo': [{'a': 1, 'b': 'santa'}, {'a': 4, 'b': 'cats'}], + 'bar': [{'c': 2, 'd': 'ducks'}, {'c': 5, 'd': 'dogs'}], + 'status': {'success': True, 'time': '5s'} + }, 'Hello')['tables'], [{ + 'table_name': 'Hello', + 'column_metadata': [{'id': 'status', 'type': 'Ref:Hello_status'}], + 'table_data': [[1]] + }, { + 'table_name': 'Hello_bar', + 'column_metadata': [ + {'id': 'c', 'type': 'Int'}, + {'id': 'd', 'type': 'Text'}, + {'id': 'Hello', 'type': 'Ref:Hello'} + ], + 'table_data': [[2, 5], ['ducks', 'dogs'], [1, 1]] + }, { + 'table_name': 'Hello_foo', + 'column_metadata': [ + {'id': 'a', 'type': 'Int'}, + {'id': 'b', 'type': 'Text'}, + {'id': 'Hello', 'type': 'Ref:Hello'}], + 'table_data': [[1, 4], ['santa', 'cats'], [1, 1]] + }, { + 'table_name': 'Hello_status', + 'column_metadata': [ + {'id': 'success', 'type': 'Bool'}, + {'id': 'time', 'type': 'Text'} + ], + 'table_data': [[True], ['5s']] + }]) + + def test_json_types(self): + self.assertEqual(import_json.dumps({ + 'a': 3, 'b': 3.14, 'c': True, 'd': 'name', 'e': -4, 'f': '3.14', 'g': None + }, 'Hello')['tables'], + [{ + 'table_name': 'Hello', + 'column_metadata': [ + {'id': 'a', 'type': 'Int'}, + {'id': 'b', 'type': 'Numeric'}, + {'id': 'c', 'type': 'Bool'}, + {'id': 'd', 'type': 'Text'}, + {'id': 'e', 'type': 'Int'}, + {'id': 'f', 'type': 'Text'}, + {'id': 'g', 'type': 'Text'} + ], + 'table_data': [[3], [3.14], [True], ['name'], [-4], ['3.14'], [None]] + }]) + + def test_type_is_defined_with_first_value(self): + tables = import_json.dumps([{'a': 'some text'}, {'a': 3}], '') + self.assertIsNotNone(tables['tables']) + self.assertIsNotNone(tables['tables'][0]) + self.assertIsNotNone(tables['tables'][0]['column_metadata']) + self.assertIsNotNone(tables['tables'][0]['column_metadata'][0]) + self.assertEqual(tables['tables'][0]['column_metadata'][0]['type'], 'Text') + + def test_first_unique_key(self): + self.assertEqual(import_json.first_available_key({'a': 1}, 'a'), 'a2') + self.assertEqual(import_json.first_available_key({'a': 1}, 'b'), 'b') + self.assertEqual(import_json.first_available_key({'a': 1, 'a2': 1}, 'a'), 'a3') + + +def dump_tables(options): + data = { + "foos": [ + {'foo': 1, 'link': [1, 2]}, + {'foo': 2, 'link': [1, 2]} + ], + "bar": {'hi': 'santa'} + } + return [t for t in import_json.dumps(data, 'FooBar', options)['tables']] + + +class TestParseOptions(TestCase): + + maxDiff = None + + # helpers + def assertColInTable(self, tables, **kwargs): + table = next(t for t in tables if t['table_name'] == kwargs['table_name']) + self.assertEqual(any(col['id'] == kwargs['col_id'] for col in table['column_metadata']), + kwargs['present']) + + def assertTableNamesEqual(self, tables, expected_table_names): + table_names = [t['table_name'] for t in tables] + self.assertEqual(sorted(table_names), sorted(expected_table_names)) + + def test_including_empty_string_includes_all(self): + tables = dump_tables({'includes': '', 'excludes': ''}) + self.assertTableNamesEqual(tables, ['FooBar', 'FooBar_bar', 'FooBar_foos', 'FooBar_foos_link']) + + def test_including_foos_includes_nested_object_and_removes_ref_to_table_not_included(self): + tables = dump_tables({'includes': 'FooBar_foos', 'excludes': ''}) + self.assertTableNamesEqual(tables, ['FooBar_foos', 'FooBar_foos_link']) + self.assertColInTable(tables, table_name='FooBar_foos', col_id='FooBar', present=False) + tables = dump_tables({'includes': 'FooBar_foos_link', 'excludes': ''}) + self.assertTableNamesEqual(tables, ['FooBar_foos_link']) + self.assertColInTable(tables, table_name='FooBar_foos_link', col_id='FooBar_foos', + present=False) + + def test_excluding_foos_excludes_nested_object_and_removes_link_to_excluded_table(self): + tables = dump_tables({'includes': '', 'excludes': 'FooBar_foos'}) + self.assertTableNamesEqual(tables, ['FooBar', 'FooBar_bar']) + self.assertColInTable(tables, table_name='FooBar', col_id='foos', present=False) + + def test_excludes_works_on_nested_object_that_are_included(self): + tables = dump_tables({'includes': 'FooBar_foos', 'excludes': 'FooBar_foos_link'}) + self.assertTableNamesEqual(tables, ['FooBar_foos']) + + def test_excludes_works_on_property(self): + tables = dump_tables({'includes': '', 'excludes': 'FooBar_foos_foo'}) + self.assertTableNamesEqual(tables, ['FooBar', 'FooBar_foos', 'FooBar_foos_link', 'FooBar_bar']) + self.assertColInTable(tables, table_name='FooBar_foos', col_id='foo', present=False) + + def test_works_with_multiple_includes(self): + tables = dump_tables({'includes': 'FooBar_foos_link', 'excludes': ''}) + self.assertTableNamesEqual(tables, ['FooBar_foos_link']) + tables = dump_tables({'includes': 'FooBar_foos_link;FooBar_bar', 'excludes': ''}) + self.assertTableNamesEqual(tables, ['FooBar_bar', 'FooBar_foos_link']) + + def test_works_with_multiple_excludes(self): + tables = dump_tables({'includes': '', 'excludes': 'FooBar_foos_link;FooBar_bar'}) + self.assertTableNamesEqual(tables, ['FooBar', 'FooBar_foos']) diff --git a/plugins/core/sandbox/test_import_xls.py b/plugins/core/sandbox/test_import_xls.py new file mode 100644 index 00000000..88a4d96e --- /dev/null +++ b/plugins/core/sandbox/test_import_xls.py @@ -0,0 +1,160 @@ +# This Python file uses the following encoding: utf-8 +import calendar +import datetime +import math +import os +import unittest + +import import_xls + +def _get_fixture(filename): + return [os.path.join(os.path.dirname(__file__), "test/fixtures", filename), filename] + + +class TestImportXLS(unittest.TestCase): + + def _check_col(self, sheet, index, name, typename, values): + self.assertEqual(sheet["column_metadata"][index]["id"], name) + self.assertEqual(sheet["column_metadata"][index]["type"], typename) + self.assertEqual(sheet["table_data"][index], values) + + def test_excel(self): + parsed_file = import_xls.parse_file(*_get_fixture('test_excel.xlsx')) + + # check that column type was correctly set to int and values are properly parsed + self.assertEqual(parsed_file[1][0]["column_metadata"][0], {"type": "Int", "id": "numbers"}) + self.assertEqual(parsed_file[1][0]["table_data"][0], [1, 2, 3, 4, 5, 6, 7, 8]) + + # check that column type was correctly set to text and values are properly parsed + self.assertEqual(parsed_file[1][0]["column_metadata"][1], {"type": "Text", "id": "letters"}) + self.assertEqual(parsed_file[1][0]["table_data"][1], + ["a", "b", "c", "d", "e", "f", "g", "h"]) + + # messy tables does not support bool types yet, it classifies them as ints + self.assertEqual(parsed_file[1][0]["column_metadata"][2], {"type": "Bool", "id": "boolean"}) + self.assertEqual(parsed_file[1][False]["table_data"][2], + [True, False, True, False, True, False, True, False]) + + # check that column type was correctly set to text and values are properly parsed + self.assertEqual(parsed_file[1][0]["column_metadata"][3], + {"type": "Text", "id": "corner-cases"}) + self.assertEqual(parsed_file[1][0]["table_data"][3], + # The type is detected as text, so all values should be text. + [u'=function()', '3.0', u'two spaces after ', + u' two spaces before', u'!@#$', u'€€€', u'√∫abc$$', u'line\nbreak']) + + # check that multiple tables are created when there are multiple sheets in a document + self.assertEqual(parsed_file[1][0]["table_name"], u"Sheet1") + self.assertEqual(parsed_file[1][1]["table_name"], u"Sheet2") + self.assertEqual(parsed_file[1][1]["table_data"][0], ["a", "b", "c", "d"]) + + def test_excel_types(self): + parsed_file = import_xls.parse_file(*_get_fixture('test_excel_types.xlsx')) + sheet = parsed_file[1][0] + self._check_col(sheet, 0, "int1", "Int", [-1234123, '', '']) + self._check_col(sheet, 1, "int2", "Int", [5, '', '']) + self._check_col(sheet, 2, "textint", "Text", ["12345678902345689", '', '']) + self._check_col(sheet, 3, "bigint", "Text", ["320150170634561830", '', '']) + self._check_col(sheet, 4, "num2", "Numeric", [123456789.123456, '', '']) + self._check_col(sheet, 5, "bignum", "Numeric", [math.exp(200), '', '']) + self._check_col(sheet, 6, "date1", "DateTime", + [calendar.timegm(datetime.datetime(2015, 12, 22, 11, 59, 00).timetuple()), None, None]) + self._check_col(sheet, 7, "date2", "Date", + [calendar.timegm(datetime.datetime(2015, 12, 20, 0, 0, 0).timetuple()), None, None]) + self._check_col(sheet, 8, "datetext", "Date", + [calendar.timegm(datetime.date(2015, 12, 22).timetuple()), None, None]) + # TODO: all dates have different format + # self._check_col(sheet, 9, "datetimetext", "DateTime", + # [calendar.timegm(datetime.datetime(2015, 12, 22, 0, 0, 0).timetuple()), + # calendar.timegm(datetime.datetime(2015, 12, 22, 13, 15, 0).timetuple()), + # calendar.timegm(datetime.datetime(2018, 02, 27, 16, 8, 39).timetuple())]) + + def test_excel_type_detection(self): + # This tests goes over the second sheet of the fixture doc, which has multiple rows that try + # to throw off the type detection. + parsed_file = import_xls.parse_file(*_get_fixture('test_excel_types.xlsx')) + sheet = parsed_file[1][1] + self._check_col(sheet, 0, "date_with_other", "DateTime", + [1467676800.0, 1451606400.0, 1451692800.0, 1454544000.0, 1199577600.0, + 1467732614.0, u'n/a', 1207958400.0, 1451865600.0, 1451952000.0, + None, 1452038400.0, 1451549340.0, 1483214940.0, None, + 1454544000.0, 1199577600.0, 1451692800.0, 1451549340.0, 1483214940.0]) + self._check_col(sheet, 1, "float_not_int", "Numeric", + [1,2,3,4,5,"",6,7,8,9,10,10.25,11,12,13,14,15,16,17,18]) + self._check_col(sheet, 2, "int_not_bool", "Int", + [0, 0, 1, 0, 1, 0, 0, 1, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]) + self._check_col(sheet, 3, "float_not_bool", "Numeric", + [0, 0, 1, 0, 1, 0, 0, 1, 0, 0.5, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]) + self._check_col(sheet, 4, "text_as_bool", "Bool", + [0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]) + self._check_col(sheet, 5, "int_as_bool", "Bool", + [0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0]) + self._check_col(sheet, 6, "float_not_date", "Numeric", + [4.0, 6.0, 4.0, 4.0, 6.0, 4.0, '--', 6.0, 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 6.0, + 4.0, 6.0, '3-4', 4.0, 6.5]) + self._check_col(sheet, 7, "float_not_text", "Numeric", + [-10.25, -8.00, -5.75, -3.50, "n/a", 1.00, " ??? ", 5.50, "", "-", + 12.25, 0.00, "", 0.00, "--", 23.50, "NA", 28.00, 30.25, 32.50]) + self._check_col(sheet, 8, "dollar_amts", "Numeric", + [0.00, 0.75, 1.50, '', 3.00, 0.00, 0.75, 1.50, '--', 3.00, 1234.56, 1000, + 1001.50, '-', 3000000.000, 0000.00, 1234.56, 1000, 1001.50, 1000.01]) + + def test_excel_single_merged_cell(self): + # An older version of xlrd had a bug where a single cell marked as 'merged' would cause an + # exception. + parsed_file = import_xls.parse_file(*_get_fixture('test_single_merged_cell.xlsx')) + tables = parsed_file[1] + self.assertEqual(tables, [{ + 'table_name': u'Transaction Report', + 'column_metadata': [ + {'type': 'Text', 'id': u''}, + {'type': 'Numeric', 'id': u'Start'}, + {'type': 'Numeric', 'id': u''}, + {'type': 'Numeric', 'id': u''}, + {'type': 'Text', 'id': u'Seek no easy ways'}, + ], + 'table_data': [ + [u'SINGLE MERGED', u'The End'], + [1637384.52, u''], + [2444344.06, u''], + [2444344.06, u''], + [u'', u''], + ], + }]) + + def test_excel_strange_dates(self): + # TODO fails with xlrd.xldate.XLDateAmbiguous: 4.180902777777778 + # Check that we don't fail when encountering unusual dates and times (e.g. 0 or 38:00:00). + parsed_file = import_xls.parse_file(*_get_fixture('strange_dates.xlsx')) + tables = parsed_file[1] + # We test non-failure, but the result is not really what we want. E.g. "1:10" and "100:20:30" + # would be best left as text, but here become "01:10:00" (after xlrd parses the first as + # datetime.time), and as 4.18... (after xlrd fails and we resort to the numerical value). + self.assertEqual(tables, [{ + 'table_name': u'Sheet1', + 'column_metadata': [ + {'id': 'a', 'type': 'Text'}, + {'id': 'b', 'type': 'Date'}, + {'id': 'c', 'type': 'Text'}, + {'id': 'd', 'type': 'Text'}, + {'id': 'e', 'type': 'Numeric'}, + {'id': 'f', 'type': 'Int'}, + {'id': 'g', 'type': 'Date'}, + {'id': 'h', 'type': 'Date'}, + {'id': 'i', 'type': 'Bool'}, + ], + 'table_data': [ + [u'21:14:00'], + [1568851200.0], + [u'01:10:00'], + [u'10:20:30'], + [4.180902777777778], + [20], + [-6106060800.0], + [205286400.0], + [False], # This is not great either, we should be able to distinguish 0 from FALSE. + ], + }]) + +if __name__ == '__main__': + unittest.main() diff --git a/sandbox/docker/Dockerfile b/sandbox/docker/Dockerfile new file mode 100644 index 00000000..eb8b7799 --- /dev/null +++ b/sandbox/docker/Dockerfile @@ -0,0 +1,14 @@ +FROM python:3.9 + +COPY requirements3.txt /tmp/requirements3.txt + +RUN \ + pip3 install -r /tmp/requirements3.txt + +RUN \ + apt-get update && \ + apt-get install -y faketime + +RUN useradd --shell /bin/bash sandbox +USER sandbox +WORKDIR / diff --git a/sandbox/docker/Makefile b/sandbox/docker/Makefile new file mode 100644 index 00000000..a9b5ed12 --- /dev/null +++ b/sandbox/docker/Makefile @@ -0,0 +1,3 @@ +image: + cp ../requirements3.txt . # docker build requires files to be present. + docker build -t grist-docker-sandbox . diff --git a/sandbox/requirements.txt b/sandbox/requirements.txt index e27ebaf4..45329f1e 100644 --- a/sandbox/requirements.txt +++ b/sandbox/requirements.txt @@ -7,6 +7,7 @@ html5lib==0.999999999 iso8601==0.1.12 json_table_schema==0.2.1 lazy_object_proxy==1.6.0 +lxml==4.6.3 # used in csv plugin only? messytables==0.15.2 python_dateutil==2.6.0 python_magic==0.4.12 diff --git a/sandbox/requirements3.txt b/sandbox/requirements3.txt new file mode 100644 index 00000000..13f3ac41 --- /dev/null +++ b/sandbox/requirements3.txt @@ -0,0 +1,21 @@ +astroid==2.5.7 # this is a difference between python 2 and 3, everything else is same +asttokens==2.0.5 +backports.functools-lru-cache==1.6.4 +chardet==2.3.0 +enum34==1.1.10 +html5lib==0.999999999 +iso8601==0.1.12 +json_table_schema==0.2.1 +lazy_object_proxy==1.6.0 +lxml==4.6.3 # used in csv plugin only? +messytables==0.15.2 +python_dateutil==2.6.0 +python_magic==0.4.12 +roman==2.0.0 +singledispatch==3.6.2 +six==1.16.0 +sortedcontainers==1.5.7 +webencodings==0.5 +wrapt==1.12.1 +xlrd==1.2.0 +unittest-xml-reporting==2.0.0 diff --git a/stubs/app/server/lib/create.ts b/stubs/app/server/lib/create.ts index 12864004..ec925076 100644 --- a/stubs/app/server/lib/create.ts +++ b/stubs/app/server/lib/create.ts @@ -4,7 +4,7 @@ import {ScopedSession} from 'app/server/lib/BrowserSession'; import {NSandboxCreator} from 'app/server/lib/NSandbox'; // Use raw python - update when pynbox or other solution is set up for core. -const sandboxCreator = new NSandboxCreator('unsandboxed'); +const sandboxCreator = new NSandboxCreator({defaultFlavor: 'unsandboxed'}); export const create: ICreate = { adjustSession(scopedSession: ScopedSession): void { diff --git a/yarn.lock b/yarn.lock index ddba023c..cf811807 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2,6 +2,45 @@ # yarn lockfile v1 +"@fast-csv/format@4.3.5": + version "4.3.5" + resolved "https://registry.yarnpkg.com/@fast-csv/format/-/format-4.3.5.tgz#90d83d1b47b6aaf67be70d6118f84f3e12ee1ff3" + integrity sha512-8iRn6QF3I8Ak78lNAa+Gdl5MJJBM5vRHivFtMRUWINdevNo00K7OXxS2PshawLKTejVwieIlPmK5YlLu6w4u8A== + dependencies: + "@types/node" "^14.0.1" + lodash.escaperegexp "^4.1.2" + lodash.isboolean "^3.0.3" + lodash.isequal "^4.5.0" + lodash.isfunction "^3.0.9" + lodash.isnil "^4.0.0" + +"@fast-csv/parse@4.3.6": + version "4.3.6" + resolved "https://registry.yarnpkg.com/@fast-csv/parse/-/parse-4.3.6.tgz#ee47d0640ca0291034c7aa94039a744cfb019264" + integrity sha512-uRsLYksqpbDmWaSmzvJcuApSEe38+6NQZBUsuAyMZKqHxH0g1wcJgsKUvN3WC8tewaqFjBMMGrkHmC+T7k8LvA== + dependencies: + "@types/node" "^14.0.1" + lodash.escaperegexp "^4.1.2" + lodash.groupby "^4.6.0" + lodash.isfunction "^3.0.9" + lodash.isnil "^4.0.0" + lodash.isundefined "^3.0.1" + lodash.uniq "^4.5.0" + +"@googleapis/drive@0.3.1": + version "0.3.1" + resolved "https://registry.yarnpkg.com/@googleapis/drive/-/drive-0.3.1.tgz#d37e53534562a3e7755611742daff545dea8b857" + integrity sha512-LYsxWMHFt2Z7kdO16EE6jVP07Qq141hmZHHuFvljWLVo7EBtiPCgzrkjJe03Q7wNs28rnFe0jlIDiBZYT0tnmA== + dependencies: + googleapis-common "^5.0.1" + +"@googleapis/oauth2@0.2.0": + version "0.2.0" + resolved "https://registry.yarnpkg.com/@googleapis/oauth2/-/oauth2-0.2.0.tgz#fd14232d8b01c27423e2b73b70638db9ec406309" + integrity sha512-QT+beWILtV1lVa//8+tDln3r7uz79EnrRsHf9hYOxZ/EAD61r0WaXo2/sODqQN0az2eR0Z1aldLRRZD7SLpTcQ== + dependencies: + googleapis-common "^5.0.1" + "@gristlabs/connect-sqlite3@0.9.11-grist.1": version "0.9.11-grist.1" resolved "https://registry.yarnpkg.com/@gristlabs/connect-sqlite3/-/connect-sqlite3-0.9.11-grist.1.tgz#a9da7789786e1e32b94cdfb9749360f9eacd79da" @@ -235,6 +274,11 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-10.17.56.tgz#010c9e047c3ff09ddcd11cbb6cf5912725cdc2b3" integrity sha512-LuAa6t1t0Bfw4CuSR0UITsm1hP17YL+u82kfHGrHUWdhlBtH7sa7jGY5z7glGaIj/WDYDkRtgGd+KCjCzxBW1w== +"@types/node@^14.0.1": + version "14.17.6" + resolved "https://registry.yarnpkg.com/@types/node/-/node-14.17.6.tgz#cc61c8361c89e70c468cda464d1fa3dd7e5ebd62" + integrity sha512-iBxsxU7eswQDGhlr3AiamBxOssaYxbM+NKXVil8jg9yFXvrfEFbDumLD/2dMTB+zYyg7w+Xjt8yuxfdbUHAtcQ== + "@types/node@^8.0.24": version "8.10.66" resolved "https://registry.yarnpkg.com/@types/node/-/node-8.10.66.tgz#dd035d409df322acc83dff62a602f12a5783bbb3" @@ -317,6 +361,11 @@ dependencies: "@types/node" "*" +"@types/which@2.0.1": + version "2.0.1" + resolved "https://registry.yarnpkg.com/@types/which/-/which-2.0.1.tgz#27ecd67f915b7c3d6ba552135bb1eecd66e63501" + integrity sha512-Jjakcv8Roqtio6w1gr0D7y6twbhx6gGgFGF5BLwajPpnOIOxFkakFhCq+LmyyeAz7BX6ULrjBOxdKaCDy+4+dQ== + "@webassemblyjs/ast@1.8.5": version "1.8.5" resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.8.5.tgz#51b1c5fe6576a34953bf4b253df9f0d490d9e359" @@ -486,6 +535,13 @@ abbrev@1: resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8" integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q== +abort-controller@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" + integrity sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg== + dependencies: + event-target-shim "^5.0.0" + accepts@~1.3.5: version "1.3.7" resolved "https://registry.yarnpkg.com/accepts/-/accepts-1.3.7.tgz#531bc726517a3b2b41f850021c6cc15eaab507cd" @@ -660,6 +716,35 @@ aproba@^1.0.3, aproba@^1.1.1: resolved "https://registry.yarnpkg.com/aproba/-/aproba-1.2.0.tgz#6802e6264efd18c790a1b0d517f0f2627bf2c94a" integrity sha512-Y9J6ZjXtoYh8RnXVCMOU/ttDmk1aBjunq9vO0ta5x85WDQiQfUF9sIPBITdbiiIVcBo03Hi3jMxigBtsddlXRw== +archiver-utils@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/archiver-utils/-/archiver-utils-2.1.0.tgz#e8a460e94b693c3e3da182a098ca6285ba9249e2" + integrity sha512-bEL/yUb/fNNiNTuUz979Z0Yg5L+LzLxGJz8x79lYmR54fmTIb6ob/hNQgkQnIUDWIFjZVQwl9Xs356I6BAMHfw== + dependencies: + glob "^7.1.4" + graceful-fs "^4.2.0" + lazystream "^1.0.0" + lodash.defaults "^4.2.0" + lodash.difference "^4.5.0" + lodash.flatten "^4.4.0" + lodash.isplainobject "^4.0.6" + lodash.union "^4.6.0" + normalize-path "^3.0.0" + readable-stream "^2.0.0" + +archiver@^5.0.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/archiver/-/archiver-5.3.0.tgz#dd3e097624481741df626267564f7dd8640a45ba" + integrity sha512-iUw+oDwK0fgNpvveEsdQ0Ase6IIKztBJU2U0E9MzszMfmVVUyv1QJhS2ITW9ZCqx8dktAxVAjWWkKehuZE8OPg== + dependencies: + archiver-utils "^2.1.0" + async "^3.2.0" + buffer-crc32 "^0.2.1" + readable-stream "^3.6.0" + readdir-glob "^1.0.0" + tar-stream "^2.2.0" + zip-stream "^4.1.0" + are-we-there-yet@~1.1.2: version "1.1.5" resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-1.1.5.tgz#4b35c2944f062a8bfcda66410760350fe9ddfc21" @@ -732,6 +817,11 @@ array-unique@^0.3.2: resolved "https://registry.yarnpkg.com/array-unique/-/array-unique-0.3.2.tgz#a894b75d4bc4f6cd679ef3244a9fd8f46ae2d428" integrity sha1-qJS3XUvE9s1nnvMkSp/Y9Gri1Cg= +arrify@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/arrify/-/arrify-2.0.1.tgz#c9655e9331e0abcd588d2a7cad7e9956f66701fa" + integrity sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug== + asn1.js@^4.0.0: version "4.10.1" resolved "https://registry.yarnpkg.com/asn1.js/-/asn1.js-4.10.1.tgz#b9c2bf5805f1e64aadeed6df3a2bfafb5a73f5a0" @@ -795,6 +885,11 @@ async@^2.5.0: dependencies: lodash "^4.17.14" +async@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/async/-/async-3.2.0.tgz#b3a2685c5ebb641d3de02d161002c60fc9f85720" + integrity sha512-TR2mEZFVOj2pLStYxLht7TyfuRzaydfpxr3k9RpHIzMgw7A64dzsdqCxH1WJyQdoe8T10nDXd9wnEigmiuHIZw== + async@~1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/async/-/async-1.0.0.tgz#f8fc04ca3a13784ade9e1641af98578cfbd647a9" @@ -845,7 +940,7 @@ base64-js@^1.0.2: resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.3.1.tgz#58ece8cb75dd07e71ed08c736abc5fac4dbf8df1" integrity sha512-mLQ4i2QO1ytvGWFWmcngKO//JXAQueZvwEKtjgQFM4jIK0kU+ytMfplL8j+n5mspOfjHwoAg+9yhb7BwAHm36g== -base64-js@^1.3.1: +base64-js@^1.3.0, base64-js@^1.3.1: version "1.5.1" resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a" integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== @@ -877,11 +972,21 @@ bcrypt-pbkdf@^1.0.0: dependencies: tweetnacl "^0.14.3" +big-integer@^1.6.17: + version "1.6.48" + resolved "https://registry.yarnpkg.com/big-integer/-/big-integer-1.6.48.tgz#8fd88bd1632cba4a1c8c3e3d7159f08bb95b4b9e" + integrity sha512-j51egjPa7/i+RdiRuJbPdJ2FIUYYPhvYLjzoYbcMMm62ooO6F94fETG4MTs46zPAF9Brs04OajboA/qTGuz78w== + big.js@^5.2.2: version "5.2.2" resolved "https://registry.yarnpkg.com/big.js/-/big.js-5.2.2.tgz#65f0af382f578bcdc742bd9c281e9cb2d7768328" integrity sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ== +bignumber.js@^9.0.0: + version "9.0.1" + resolved "https://registry.yarnpkg.com/bignumber.js/-/bignumber.js-9.0.1.tgz#8d7ba124c882bfd8e43260c67475518d0689e4e5" + integrity sha512-IdZR9mh6ahOBv/hYGiXyVuyCetmGJhtYkqLBpTStdhEGjegpPlUawydyaF3pbIOFynJTpllEs+NP+CS9jKFLjA== + binary-extensions@^1.0.0: version "1.13.1" resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-1.13.1.tgz#598afe54755b2868a5330d2aff9d4ebb53209b65" @@ -892,6 +997,14 @@ binary-extensions@^2.0.0: resolved "https://registry.yarnpkg.com/binary-extensions/-/binary-extensions-2.0.0.tgz#23c0df14f6a88077f5f986c0d167ec03c3d5537c" integrity sha512-Phlt0plgpIIBOGTT/ehfFnbNlfsDEiqmzE2KRXoX1bLIlir4X/MR+zSyBEkL05ffWgnRSf/DXv+WrUAVr93/ow== +binary@~0.3.0: + version "0.3.0" + resolved "https://registry.yarnpkg.com/binary/-/binary-0.3.0.tgz#9f60553bc5ce8c3386f3b553cff47462adecaa79" + integrity sha1-n2BVO8XOjDOG87VTz/R0Yq3sqnk= + dependencies: + buffers "~0.1.1" + chainsaw "~0.1.0" + bindings@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df" @@ -899,11 +1012,25 @@ bindings@^1.5.0: dependencies: file-uri-to-path "1.0.0" +bl@^4.0.3: + version "4.1.0" + resolved "https://registry.yarnpkg.com/bl/-/bl-4.1.0.tgz#451535264182bec2fbbc83a62ab98cf11d9f7b3a" + integrity sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w== + dependencies: + buffer "^5.5.0" + inherits "^2.0.4" + readable-stream "^3.4.0" + bluebird@3.7.2, bluebird@^3.3.3, bluebird@^3.5.0, bluebird@^3.5.5: version "3.7.2" resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== +bluebird@~3.4.1: + version "3.4.7" + resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.4.7.tgz#f72d760be09b7f76d08ed8fae98b289a8d05fab3" + integrity sha1-9y12C+Cbf3bQjtj66Ysomo0F+rM= + bn.js@^4.0.0, bn.js@^4.1.0, bn.js@^4.4.0: version "4.11.8" resolved "https://registry.yarnpkg.com/bn.js/-/bn.js-4.11.8.tgz#2cde09eb5ee341f484746bb0309b3253b1b1442f" @@ -1139,16 +1266,26 @@ browserify@^14.4.0: vm-browserify "~0.0.1" xtend "^4.0.0" -buffer-crc32@~0.2.3: +buffer-crc32@^0.2.1, buffer-crc32@^0.2.13, buffer-crc32@~0.2.3: version "0.2.13" resolved "https://registry.yarnpkg.com/buffer-crc32/-/buffer-crc32-0.2.13.tgz#0d333e3f00eac50aa1454abd30ef8c2a5d9a7242" integrity sha1-DTM+PwDqxQqhRUq9MO+MKl2ackI= +buffer-equal-constant-time@1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz#f8e71132f7ffe6e01a5c9697a4c6f3e48d5cc819" + integrity sha1-+OcRMvf/5uAaXJaXpMbz5I1cyBk= + buffer-from@^1.0.0: version "1.1.1" resolved "https://registry.yarnpkg.com/buffer-from/-/buffer-from-1.1.1.tgz#32713bc028f75c02fdb710d7c7bcec1f2c6070ef" integrity sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A== +buffer-indexof-polyfill@~1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/buffer-indexof-polyfill/-/buffer-indexof-polyfill-1.0.2.tgz#d2732135c5999c64b277fcf9b1abe3498254729c" + integrity sha512-I7wzHwA3t1/lwXQh+A5PbNvJxgfo5r3xulgpYDB5zckTu/Z9oUK9biouBKQUjEqzaz3HnAT6TYoovmE+GqSf7A== + buffer-xor@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/buffer-xor/-/buffer-xor-1.0.3.tgz#26e61ed1422fb70dd42e6e36729ed51d855fe8d9" @@ -1163,7 +1300,7 @@ buffer@^4.3.0: ieee754 "^1.1.4" isarray "^1.0.0" -buffer@^5.0.2, buffer@^5.1.0: +buffer@^5.0.2, buffer@^5.1.0, buffer@^5.5.0: version "5.7.1" resolved "https://registry.yarnpkg.com/buffer/-/buffer-5.7.1.tgz#ba62e7c13133053582197160851a8f648e99eed0" integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== @@ -1171,6 +1308,11 @@ buffer@^5.0.2, buffer@^5.1.0: base64-js "^1.3.1" ieee754 "^1.1.13" +buffers@~0.1.1: + version "0.1.1" + resolved "https://registry.yarnpkg.com/buffers/-/buffers-0.1.1.tgz#b24579c3bed4d6d396aeee6d9a8ae7f5482ab7bb" + integrity sha1-skV5w77U1tOWru5tmorn9Ugqt7s= + builtin-status-codes@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/builtin-status-codes/-/builtin-status-codes-3.0.0.tgz#85982878e21b98e1c66425e03d0174788f569ee8" @@ -1317,6 +1459,13 @@ chai@^4.1.2: pathval "^1.1.1" type-detect "^4.0.5" +chainsaw@~0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/chainsaw/-/chainsaw-0.1.0.tgz#5eab50b28afe58074d0d58291388828b5e5fbc98" + integrity sha1-XqtQsor+WAdNDVgpE4iCi15fvJg= + dependencies: + traverse ">=0.3.0 <0.4" + chalk@^1.1.1: version "1.1.3" resolved "https://registry.yarnpkg.com/chalk/-/chalk-1.1.3.tgz#a8115c55e4a702fe4d150abd3872822a7e09fc98" @@ -1622,6 +1771,16 @@ components-jqueryui@1.12.1: resolved "https://registry.yarnpkg.com/components-jqueryui/-/components-jqueryui-1.12.1.tgz#617076f128f3be4c265f3e2db50471ef96cd9cee" integrity sha1-YXB28SjzvkwmXz4ttQRx75bNnO4= +compress-commons@^4.1.0: + version "4.1.1" + resolved "https://registry.yarnpkg.com/compress-commons/-/compress-commons-4.1.1.tgz#df2a09a7ed17447642bad10a85cc9a19e5c42a7d" + integrity sha512-QLdDLCKNV2dtoTorqgxngQCMA+gWXkM/Nwu7FpeBhk/RdkzimqC3jueb/FDmaZeXh+uby1jkBqE3xArsLBE5wQ== + dependencies: + buffer-crc32 "^0.2.13" + crc32-stream "^4.0.2" + normalize-path "^3.0.0" + readable-stream "^3.6.0" + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -1733,6 +1892,22 @@ core-util-is@1.0.2, core-util-is@~1.0.0: resolved "https://registry.yarnpkg.com/core-util-is/-/core-util-is-1.0.2.tgz#b5fd54220aa2bc5ab57aab7140c940754503c1a7" integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac= +crc-32@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/crc-32/-/crc-32-1.2.0.tgz#cb2db6e29b88508e32d9dd0ec1693e7b41a18208" + integrity sha512-1uBwHxF+Y/4yF5G48fwnKq6QsIXheor3ZLPT80yGBV1oEUwpPojlEhQbWKVw1VwcTQyMGHK1/XMmTjmlsmTTGA== + dependencies: + exit-on-epipe "~1.0.1" + printj "~1.1.0" + +crc32-stream@^4.0.2: + version "4.0.2" + resolved "https://registry.yarnpkg.com/crc32-stream/-/crc32-stream-4.0.2.tgz#c922ad22b38395abe9d3870f02fa8134ed709007" + integrity sha512-DxFZ/Hk473b/muq1VJ///PMNLj0ZMnzye9thBpmjpJKCc5eMgB95aK8zCGrGfQ90cWo561Te6HK9D+j4KPdM6w== + dependencies: + crc-32 "^1.2.0" + readable-stream "^3.4.0" + create-ecdh@^4.0.0: version "4.0.3" resolved "https://registry.yarnpkg.com/create-ecdh/-/create-ecdh-4.0.3.tgz#c9111b6f33045c4697f144787f9254cdc77c45ff" @@ -1860,6 +2035,11 @@ dashdash@^1.12.0: dependencies: assert-plus "^1.0.0" +dayjs@^1.8.34: + version "1.10.6" + resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.10.6.tgz#288b2aa82f2d8418a6c9d4df5898c0737ad02a63" + integrity sha512-AztC/IOW4L1Q41A86phW5Thhcrco3xuAA+YX/BLpLWWjRcTj5TOt/QImBLmCKlrF7u7k47arTnOyL6GnbG8Hvw== + debug@2.6.9, debug@^2.1.3, debug@^2.2.0, debug@^2.3.3, debug@^2.6.9: version "2.6.9" resolved "https://registry.yarnpkg.com/debug/-/debug-2.6.9.tgz#5d128515df134ff327e90a4c93f4e077a536341f" @@ -2097,7 +2277,7 @@ double-ended-queue@2.1.0-0, double-ended-queue@^2.1.0-0: resolved "https://registry.yarnpkg.com/double-ended-queue/-/double-ended-queue-2.1.0-0.tgz#103d3527fd31528f40188130c841efdd78264e5c" integrity sha1-ED01J/0xUo9AGIEwyEHv3XgmTlw= -duplexer2@^0.1.2, duplexer2@^0.1.4, duplexer2@~0.1.0, duplexer2@~0.1.2: +duplexer2@^0.1.2, duplexer2@^0.1.4, duplexer2@~0.1.0, duplexer2@~0.1.2, duplexer2@~0.1.4: version "0.1.4" resolved "https://registry.yarnpkg.com/duplexer2/-/duplexer2-0.1.4.tgz#8b12dab878c0d69e3e7891051662a32fc6bddcc1" integrity sha1-ixLauHjA1p4+eJEFFmKjL8a93ME= @@ -2132,6 +2312,13 @@ ecc-jsbn@~0.1.1: jsbn "~0.1.0" safer-buffer "^2.1.0" +ecdsa-sig-formatter@1.0.11, ecdsa-sig-formatter@^1.0.11: + version "1.0.11" + resolved "https://registry.yarnpkg.com/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz#ae0f0fa2d85045ef14a817daa3ce9acd0489e5bf" + integrity sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ== + dependencies: + safe-buffer "^5.0.1" + ee-first@1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/ee-first/-/ee-first-1.1.1.tgz#590c61156b0ae2f4f0255732a158b266bc56b21d" @@ -2194,7 +2381,7 @@ encodeurl@~1.0.2: resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" integrity sha1-rT/0yG7C0CkyL1oCw6mmBslbP1k= -end-of-stream@^1.0.0, end-of-stream@^1.1.0: +end-of-stream@^1.0.0, end-of-stream@^1.1.0, end-of-stream@^1.4.1: version "1.4.4" resolved "https://registry.yarnpkg.com/end-of-stream/-/end-of-stream-1.4.4.tgz#5ae64a5f45057baf3626ec14da0ca5e4b2431eb0" integrity sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q== @@ -2310,6 +2497,11 @@ etag@~1.8.1: resolved "https://registry.yarnpkg.com/etag/-/etag-1.8.1.tgz#41ae2eeb65efa62268aebfea83ac7d79299b0887" integrity sha1-Qa4u62XvpiJorr/qg6x9eSmbCIc= +event-target-shim@^5.0.0: + version "5.0.1" + resolved "https://registry.yarnpkg.com/event-target-shim/-/event-target-shim-5.0.1.tgz#5d4d3ebdf9583d63a5333ce2deb7480ab2b05789" + integrity sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ== + events@^1.1.1, events@~1.1.0: version "1.1.1" resolved "https://registry.yarnpkg.com/events/-/events-1.1.1.tgz#9ebdb7635ad099c70dcc4c2a1f5004288e8bd924" @@ -2328,6 +2520,21 @@ evp_bytestokey@^1.0.0, evp_bytestokey@^1.0.3: md5.js "^1.3.4" safe-buffer "^5.1.1" +exceljs@4.2.1: + version "4.2.1" + resolved "https://registry.yarnpkg.com/exceljs/-/exceljs-4.2.1.tgz#49d74babfcae74f61bcfc8e9964f0feca084d91b" + integrity sha512-EogoTdXH1X1PxqD9sV8caYd1RIfXN3PVlCV+mA/87CgdO2h4X5xAEbr7CaiP8tffz7L4aBFwsdMbjfMXi29NjA== + dependencies: + archiver "^5.0.0" + dayjs "^1.8.34" + fast-csv "^4.3.1" + jszip "^3.5.0" + readable-stream "^3.6.0" + saxes "^5.0.1" + tmp "^0.2.0" + unzipper "^0.10.11" + uuid "^8.3.0" + execa@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/execa/-/execa-1.0.0.tgz#c6236a5bb4df6d6f15e88e7f017798216749ddd8" @@ -2341,6 +2548,11 @@ execa@^1.0.0: signal-exit "^3.0.0" strip-eof "^1.0.0" +exit-on-epipe@~1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/exit-on-epipe/-/exit-on-epipe-1.0.1.tgz#0bdd92e87d5285d267daa8171d0eb06159689692" + integrity sha512-h2z5mrROTxce56S+pnvAV890uu7ls7f1kEvVGJbw1OlFH3/mlJ5bkXu0KRyW94v37zzHPiUd55iLn3DA7TjWpw== + expand-brackets@^2.1.4: version "2.1.4" resolved "https://registry.yarnpkg.com/expand-brackets/-/expand-brackets-2.1.4.tgz#b77735e315ce30f6b6eff0f83b04151a22449622" @@ -2412,7 +2624,7 @@ extend-shallow@^3.0.0, extend-shallow@^3.0.2: assign-symbols "^1.0.0" is-extendable "^1.0.1" -extend@~3.0.2: +extend@^3.0.2, extend@~3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/extend/-/extend-3.0.2.tgz#f8b1136b4071fbd8eb140aff858b1019ec2915fa" integrity sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g== @@ -2456,6 +2668,14 @@ eyes@0.1.x: resolved "https://registry.yarnpkg.com/eyes/-/eyes-0.1.8.tgz#62cf120234c683785d902348a800ef3e0cc20bc0" integrity sha1-Ys8SAjTGg3hdkCNIqADvPgzCC8A= +fast-csv@^4.3.1: + version "4.3.6" + resolved "https://registry.yarnpkg.com/fast-csv/-/fast-csv-4.3.6.tgz#70349bdd8fe4d66b1130d8c91820b64a21bc4a63" + integrity sha512-2RNSpuwwsJGP0frGsOmTb9oUF+VkFSM4SyLTDgwf2ciHWTarN0lQTC+F2f/t5J9QjW+c65VFIAAu85GsvMIusw== + dependencies: + "@fast-csv/format" "4.3.5" + "@fast-csv/parse" "4.3.6" + fast-deep-equal@^3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.1.tgz#545145077c501491e33b15ec408c294376e94ae4" @@ -2471,6 +2691,11 @@ fast-safe-stringify@^2.0.7: resolved "https://registry.yarnpkg.com/fast-safe-stringify/-/fast-safe-stringify-2.0.7.tgz#124aa885899261f68aedb42a7c080de9da608743" integrity sha512-Utm6CdzT+6xsDk2m8S6uL8VHxNwI6Jub+e9NYTcAms28T84pTa25GJQV9j0CY0N1rM8hK4x6grpF2BQf+2qwVA== +fast-text-encoding@^1.0.0: + version "1.0.3" + resolved "https://registry.yarnpkg.com/fast-text-encoding/-/fast-text-encoding-1.0.3.tgz#ec02ac8e01ab8a319af182dae2681213cfe9ce53" + integrity sha512-dtm4QZH9nZtcDt8qJiOH9fcQd1NAgi+K1O2DbE6GG1PPCK/BWfOH3idCTRQ4ImXRUOyopDEgDEnVEE7Y/2Wrig== + fd-slicer@~1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/fd-slicer/-/fd-slicer-1.1.0.tgz#25c7c89cb1f9077f8891bbe61d8f390eae256f1e" @@ -2631,6 +2856,11 @@ from2@^2.1.0: inherits "^2.0.1" readable-stream "^2.0.0" +fs-constants@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad" + integrity sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow== + fs-extra@7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-7.0.0.tgz#8cc3f47ce07ef7b3593a11b9fb245f7e34c041d6" @@ -2705,6 +2935,16 @@ fsevents@~2.3.1: resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.2.tgz#8a526f78b8fdf4623b709e0b975c52c24c02fd1a" integrity sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA== +fstream@^1.0.12: + version "1.0.12" + resolved "https://registry.yarnpkg.com/fstream/-/fstream-1.0.12.tgz#4e8ba8ee2d48be4f7d0de505455548eae5932045" + integrity sha512-WvJ193OHa0GHPEL+AycEJgxvBEwyfRkN1vhjca23OaPVMCaLCXTd5qAu82AjTcgP1UJmytkOKb63Ypde7raDIg== + dependencies: + graceful-fs "^4.1.2" + inherits "~2.0.0" + mkdirp ">=0.5 0" + rimraf "2" + function-bind@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" @@ -2724,6 +2964,25 @@ gauge@~2.7.3: strip-ansi "^3.0.1" wide-align "^1.1.0" +gaxios@^4.0.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/gaxios/-/gaxios-4.3.0.tgz#ad4814d89061f85b97ef52aed888c5dbec32f774" + integrity sha512-pHplNbslpwCLMyII/lHPWFQbJWOX0B3R1hwBEOvzYi1GmdKZruuEHK4N9V6f7tf1EaPYyF80mui1+344p6SmLg== + dependencies: + abort-controller "^3.0.0" + extend "^3.0.2" + https-proxy-agent "^5.0.0" + is-stream "^2.0.0" + node-fetch "^2.3.0" + +gcp-metadata@^4.2.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/gcp-metadata/-/gcp-metadata-4.3.0.tgz#0423d06becdbfb9cbb8762eaacf14d5324997900" + integrity sha512-L9XQUpvKJCM76YRSmcxrR4mFPzPGsgZUH+GgHMxAET8qc6+BhRJq63RLhWakgEO2KKVgeSDVfyiNjkGSADwNTA== + dependencies: + gaxios "^4.0.0" + json-bigint "^1.0.0" + geckodriver@^1.19.1: version "1.22.2" resolved "https://registry.yarnpkg.com/geckodriver/-/geckodriver-1.22.2.tgz#e0904bed50a1d2abaa24597d4ae43eb6662f9d72" @@ -2902,6 +3161,40 @@ globwatcher@~1.2.2: minimatch "*" q "^1.0.1" +google-auth-library@^7.0.2: + version "7.3.0" + resolved "https://registry.yarnpkg.com/google-auth-library/-/google-auth-library-7.3.0.tgz#946a911c72425b05f02735915f03410604466657" + integrity sha512-MPeeMlnsYnoiiVFMwX3hgaS684aiXrSqKoDP+xL4Ejg4Z0qLvIeg4XsaChemyFI8ZUO7ApwDAzNtgmhWSDNh5w== + dependencies: + arrify "^2.0.0" + base64-js "^1.3.0" + ecdsa-sig-formatter "^1.0.11" + fast-text-encoding "^1.0.0" + gaxios "^4.0.0" + gcp-metadata "^4.2.0" + gtoken "^5.0.4" + jws "^4.0.0" + lru-cache "^6.0.0" + +google-p12-pem@^3.0.3: + version "3.1.1" + resolved "https://registry.yarnpkg.com/google-p12-pem/-/google-p12-pem-3.1.1.tgz#98fb717b722d12196a3e5b550c44517562269859" + integrity sha512-e9CwdD2QYkpvJsktki3Bm8P8FSGIneF+/42a9F9QHcQvJ73C2RoYZdrwRl6BhwksWtzl65gT4OnBROhUIFw95Q== + dependencies: + node-forge "^0.10.0" + +googleapis-common@^5.0.1: + version "5.0.3" + resolved "https://registry.yarnpkg.com/googleapis-common/-/googleapis-common-5.0.3.tgz#9580944e538029687a4e25726afea4a1a535ac6f" + integrity sha512-8khlXblLyT9UpB+NTZzrWfKQUW6U7gO6WnfJp51WrLgpzP7zkO+OshwtdArq8z2afj37jdrhbIT8eAxZLdwvwA== + dependencies: + extend "^3.0.2" + gaxios "^4.0.0" + google-auth-library "^7.0.2" + qs "^6.7.0" + url-template "^2.0.8" + uuid "^8.0.0" + got@5.6.0: version "5.6.0" resolved "https://registry.yarnpkg.com/got/-/got-5.6.0.tgz#bb1d7ee163b78082bbc8eb836f3f395004ea6fbf" @@ -2946,7 +3239,7 @@ graceful-fs@^4.1.11, graceful-fs@^4.1.15, graceful-fs@^4.1.2: resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.4.tgz#2256bde14d3632958c465ebc96dc467ca07a29fb" integrity sha512-WjKPNJF79dtJAVniUlGGWHYGz2jWxT6VhN/4m1NdkbZ2nOsEF+cI1Edgql5zCRhs/VsQYRvrXctxktVXZUkixw== -graceful-fs@^4.1.6, graceful-fs@^4.2.0: +graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.2: version "4.2.6" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.6.tgz#ff040b2b0853b23c3d31027523706f1885d76bee" integrity sha512-nTnJ528pbqxYanhpDYsi4Rd8MAeaBA67+RZ10CM1m3bTAVFEDcd5AuA4a6W5YkGZ1iNXHzZz8T6TBKLeBuNriQ== @@ -2969,6 +3262,15 @@ growl@1.10.5: resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.5.tgz#f2735dc2283674fa67478b10181059355c369e5e" integrity sha512-qBr4OuELkhPenW6goKVXiv47US3clb3/IbuWF9KNKEijAy9oeHxU9IgzjvJhHkUzhaj7rOUD7+YGWqUjLp5oSA== +gtoken@^5.0.4: + version "5.3.0" + resolved "https://registry.yarnpkg.com/gtoken/-/gtoken-5.3.0.tgz#6536eb2880d9829f0b9d78f756795d4d9064b217" + integrity sha512-mCcISYiaRZrJpfqOs0QWa6lfEM/C1V9ASkzFmuz43XBb5s1Vynh+CZy1ECeeJXVGx2PRByjYzb4Y4/zr1byr0w== + dependencies: + gaxios "^4.0.0" + google-p12-pem "^3.0.3" + jws "^4.0.0" + har-schema@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/har-schema/-/har-schema-2.0.0.tgz#a94c2224ebcac04782a0d9035521f24735b7ec92" @@ -3160,7 +3462,7 @@ https-browserify@^1.0.0: resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73" integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM= -https-proxy-agent@5.0.0: +https-proxy-agent@5.0.0, https-proxy-agent@^5.0.0: version "5.0.0" resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.0.tgz#e2a90542abb68a762e0a0850f6c9edadfd8506b2" integrity sha512-EkYm5BcKUGiduxzSt3Eppko+PiNWNEpa4ySk9vTC6wDsQJW9rHSa+UhGNJoRYp7bz6Ht1eaRIa6QaJqO5rCFbA== @@ -3269,7 +3571,7 @@ inflight@^1.0.4: once "^1.3.0" wrappy "1" -inherits@2, inherits@2.0.4, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.1, inherits@~2.0.3: +inherits@2, inherits@2.0.4, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4, inherits@~2.0.0, inherits@~2.0.1, inherits@~2.0.3: version "2.0.4" resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== @@ -3595,6 +3897,11 @@ is-stream@^1.0.0, is-stream@^1.1.0: resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-1.1.0.tgz#12d4a3dd4e68e0b79ceb8dbc84173ae80d91ca44" integrity sha1-EtSj3U5o4Lec6428hBc66A2RykQ= +is-stream@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.0.tgz#bde9c32680d6fae04129d6ac9d921ce7815f78e3" + integrity sha512-XCoy+WlUr7d1+Z8GgSuXmpuUFC9fOhRXglJMx+dwLKTkL44Cjd4W1Z5P+BQZpr+cR93aGP4S/s7Ftw6Nd/kiEw== + is-string@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.5.tgz#40493ed198ef3ff477b8c7f92f644ec82a5cd3a6" @@ -3712,6 +4019,13 @@ jsbn@~0.1.0: resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-0.1.1.tgz#a5e654c2e5a2deb5f201d96cefbca80c0ef2f513" integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM= +json-bigint@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/json-bigint/-/json-bigint-1.0.0.tgz#ae547823ac0cad8398667f8cd9ef4730f5b01ff1" + integrity sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ== + dependencies: + bignumber.js "^9.0.0" + json-buffer@3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.0.tgz#5b1f397afc75d677bde8bcfc0e47e1f9a3d9a898" @@ -3788,6 +4102,23 @@ jszip@^3.1.3, jszip@^3.5.0: readable-stream "~2.3.6" set-immediate-shim "~1.0.1" +jwa@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/jwa/-/jwa-2.0.0.tgz#a7e9c3f29dae94027ebcaf49975c9345593410fc" + integrity sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA== + dependencies: + buffer-equal-constant-time "1.0.1" + ecdsa-sig-formatter "1.0.11" + safe-buffer "^5.0.1" + +jws@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/jws/-/jws-4.0.0.tgz#2d4e8cf6a318ffaa12615e9dec7e86e6c97310f4" + integrity sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg== + dependencies: + jwa "^2.0.0" + safe-buffer "^5.0.1" + keyv@^3.0.0: version "3.1.0" resolved "https://registry.yarnpkg.com/keyv/-/keyv-3.1.0.tgz#ecc228486f69991e49e9476485a5be1e8fc5c4d9" @@ -3839,6 +4170,13 @@ latest-version@^5.0.0: dependencies: package-json "^6.3.0" +lazystream@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/lazystream/-/lazystream-1.0.0.tgz#f6995fe0f820392f61396be89462407bb77168e4" + integrity sha1-9plf4PggOS9hOWvolGJAe7dxaOQ= + dependencies: + readable-stream "^2.0.5" + lcid@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/lcid/-/lcid-2.0.0.tgz#6ef5d2df60e52f82eb228a4c373e8d1f397253cf" @@ -3853,6 +4191,11 @@ lie@~3.3.0: dependencies: immediate "~3.0.5" +listenercount@~1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/listenercount/-/listenercount-1.0.1.tgz#84c8a72ab59c4725321480c975e6508342e70937" + integrity sha1-hMinKrWcRyUyFIDJdeZQg0LnCTc= + load-json-file@^1.0.0: version "1.1.0" resolved "https://registry.yarnpkg.com/load-json-file/-/load-json-file-1.1.0.tgz#956905708d58b4bab4c2261b04f59f31c99374c0" @@ -3886,21 +4229,81 @@ locate-path@^3.0.0: p-locate "^3.0.0" path-exists "^3.0.0" +lodash.defaults@^4.2.0: + version "4.2.0" + resolved "https://registry.yarnpkg.com/lodash.defaults/-/lodash.defaults-4.2.0.tgz#d09178716ffea4dde9e5fb7b37f6f0802274580c" + integrity sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw= + lodash.difference@^4.5.0: version "4.5.0" resolved "https://registry.yarnpkg.com/lodash.difference/-/lodash.difference-4.5.0.tgz#9ccb4e505d486b91651345772885a2df27fd017c" integrity sha1-nMtOUF1Ia5FlE0V3KIWi3yf9AXw= +lodash.escaperegexp@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/lodash.escaperegexp/-/lodash.escaperegexp-4.1.2.tgz#64762c48618082518ac3df4ccf5d5886dae20347" + integrity sha1-ZHYsSGGAglGKw99Mz11YhtriA0c= + +lodash.flatten@^4.4.0: + version "4.4.0" + resolved "https://registry.yarnpkg.com/lodash.flatten/-/lodash.flatten-4.4.0.tgz#f31c22225a9632d2bbf8e4addbef240aa765a61f" + integrity sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8= + lodash.get@~4.4.2: version "4.4.2" resolved "https://registry.yarnpkg.com/lodash.get/-/lodash.get-4.4.2.tgz#2d177f652fa31e939b4438d5341499dfa3825e99" integrity sha1-LRd/ZS+jHpObRDjVNBSZ36OCXpk= +lodash.groupby@^4.6.0: + version "4.6.0" + resolved "https://registry.yarnpkg.com/lodash.groupby/-/lodash.groupby-4.6.0.tgz#0b08a1dcf68397c397855c3239783832df7403d1" + integrity sha1-Cwih3PaDl8OXhVwyOXg4Mt90A9E= + +lodash.isboolean@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/lodash.isboolean/-/lodash.isboolean-3.0.3.tgz#6c2e171db2a257cd96802fd43b01b20d5f5870f6" + integrity sha1-bC4XHbKiV82WgC/UOwGyDV9YcPY= + +lodash.isequal@^4.5.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/lodash.isequal/-/lodash.isequal-4.5.0.tgz#415c4478f2bcc30120c22ce10ed3226f7d3e18e0" + integrity sha1-QVxEePK8wwEgwizhDtMib30+GOA= + +lodash.isfunction@^3.0.9: + version "3.0.9" + resolved "https://registry.yarnpkg.com/lodash.isfunction/-/lodash.isfunction-3.0.9.tgz#06de25df4db327ac931981d1bdb067e5af68d051" + integrity sha512-AirXNj15uRIMMPihnkInB4i3NHeb4iBtNg9WRWuK2o31S+ePwwNmDPaTL3o7dTJ+VXNZim7rFs4rxN4YU1oUJw== + +lodash.isnil@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/lodash.isnil/-/lodash.isnil-4.0.0.tgz#49e28cd559013458c814c5479d3c663a21bfaa6c" + integrity sha1-SeKM1VkBNFjIFMVHnTxmOiG/qmw= + +lodash.isplainobject@^4.0.6: + version "4.0.6" + resolved "https://registry.yarnpkg.com/lodash.isplainobject/-/lodash.isplainobject-4.0.6.tgz#7c526a52d89b45c45cc690b88163be0497f550cb" + integrity sha1-fFJqUtibRcRcxpC4gWO+BJf1UMs= + +lodash.isundefined@^3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/lodash.isundefined/-/lodash.isundefined-3.0.1.tgz#23ef3d9535565203a66cefd5b830f848911afb48" + integrity sha1-I+89lTVWUgOmbO/VuDD4SJEa+0g= + lodash.memoize@~3.0.3: version "3.0.4" resolved "https://registry.yarnpkg.com/lodash.memoize/-/lodash.memoize-3.0.4.tgz#2dcbd2c287cbc0a55cc42328bd0c736150d53e3f" integrity sha1-LcvSwofLwKVcxCMovQxzYVDVPj8= +lodash.union@^4.6.0: + version "4.6.0" + resolved "https://registry.yarnpkg.com/lodash.union/-/lodash.union-4.6.0.tgz#48bb5088409f16f1821666641c44dd1aaae3cd88" + integrity sha1-SLtQiECfFvGCFmZkHETdGqrjzYg= + +lodash.uniq@^4.5.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773" + integrity sha1-0CJTc662Uq3BvILklFM5qEJ1R3M= + lodash@4.17.15, lodash@^4.17.14, lodash@^4.17.15, lodash@^4.17.4: version "4.17.15" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.15.tgz#b447f6670a0455bbfeedd11392eff330ea097548" @@ -3943,6 +4346,13 @@ lru-cache@^5.1.1: dependencies: yallist "^3.0.2" +lru-cache@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-6.0.0.tgz#6d6fe6570ebd96aaf90fcad1dafa3b2566db3a94" + integrity sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA== + dependencies: + yallist "^4.0.0" + make-dir@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-2.1.0.tgz#5f0310e18b8be898cc07009295a30ae41e91e6f5" @@ -4219,7 +4629,7 @@ mkdirp@0.5.1: dependencies: minimist "0.0.8" -mkdirp@0.5.5, mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@^0.5.4: +mkdirp@0.5.5, "mkdirp@>=0.5 0", mkdirp@^0.5.0, mkdirp@^0.5.1, mkdirp@^0.5.4: version "0.5.5" resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.5.tgz#d91cefd62d1436ca0f41620e251288d420099def" integrity sha512-NKmAlESf6jMGym1++R0Ra7wvhV+wFW63FaSOFPwRahvea0gMUcGUhVeAg/0BC0wiv9ih5NYPB1Wn1UEI1/L+xQ== @@ -4462,6 +4872,16 @@ node-fetch@2.2.0: resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.2.0.tgz#4ee79bde909262f9775f731e3656d0db55ced5b5" integrity sha512-OayFWziIxiHY8bCUyLX6sTpDH8Jsbp4FfYd1j1f7vZyfgkcOnAyM4oQR16f8a0s7Gl/viMGRey8eScYk4V4EZA== +node-fetch@^2.3.0: + version "2.6.1" + resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052" + integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw== + +node-forge@^0.10.0: + version "0.10.0" + resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-0.10.0.tgz#32dea2afb3e9926f02ee5ce8794902691a676bf3" + integrity sha512-PPmu8eEeG9saEUvI97fm4OYxXVB6bFvyNTyiUOBichBpFG8A1Ljw3bY62+5oOjDEMHRnd0Y7HQ+x7uzxOzC6JA== + node-libs-browser@^2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/node-libs-browser/-/node-libs-browser-2.2.1.tgz#b64f513d18338625f90346d27b0d235e631f6425" @@ -5080,6 +5500,11 @@ pretty-bytes@^1.0.2: get-stdin "^4.0.1" meow "^3.1.0" +printj@~1.1.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/printj/-/printj-1.1.2.tgz#d90deb2975a8b9f600fb3a1c94e3f4c53c78a222" + integrity sha512-zA2SmoLaxZyArQTOPj5LXecR+RagfPSU5Kw1qP+jkWeNlrq+eJZyY2oS68SU1Z/7/myXM4lo9716laOFAVStCQ== + process-nextick-args@~1.0.6: version "1.0.7" resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-1.0.7.tgz#150e20b756590ad3f91093f25a4f2ad8bff30ba3" @@ -5200,6 +5625,13 @@ qs@6.5.2, qs@~6.5.2: resolved "https://registry.yarnpkg.com/qs/-/qs-6.5.2.tgz#cb3ae806e8740444584ef154ce8ee98d403f3e36" integrity sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA== +qs@^6.7.0: + version "6.10.1" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.10.1.tgz#4931482fa8d647a5aab799c5271d2133b981fb6a" + integrity sha512-M528Hph6wsSVOBiYUnGf+K/7w0hNshs/duGsNXPUCLH5XAqjEtiPGwNONLV0tBH8NoGb0mvD5JubnUTrujKDTg== + dependencies: + side-channel "^1.0.4" + querystring-es3@^0.2.0, querystring-es3@~0.2.0: version "0.2.1" resolved "https://registry.yarnpkg.com/querystring-es3/-/querystring-es3-0.2.1.tgz#9ec61f79049875707d69414596fd907a4d711e73" @@ -5305,7 +5737,7 @@ read-pkg@^1.0.0: string_decoder "~1.1.1" util-deprecate "~1.0.1" -readable-stream@^3.6.0: +readable-stream@^3.1.1, readable-stream@^3.4.0, readable-stream@^3.6.0: version "3.6.0" resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.0.tgz#337bbda3adc0706bd3e024426a286d4b4b2c9198" integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA== @@ -5341,6 +5773,13 @@ readable-web-to-node-stream@^2.0.0: resolved "https://registry.yarnpkg.com/readable-web-to-node-stream/-/readable-web-to-node-stream-2.0.0.tgz#751e632f466552ac0d5c440cc01470352f93c4b7" integrity sha512-+oZJurc4hXpaaqsN68GoZGQAQIA3qr09Or4fqEsargABnbe5Aau8hFn6ISVleT3cpY/0n/8drn7huyyEvTbghA== +readdir-glob@^1.0.0: + version "1.1.1" + resolved "https://registry.yarnpkg.com/readdir-glob/-/readdir-glob-1.1.1.tgz#f0e10bb7bf7bfa7e0add8baffdc54c3f7dbee6c4" + integrity sha512-91/k1EzZwDx6HbERR+zucygRFfiPl2zkIYZtv3Jjr6Mn7SkKcVct8aVO+sSRiGMc6fLf72du3d92/uY63YPdEA== + dependencies: + minimatch "^3.0.4" + readdirp@^2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/readdirp/-/readdirp-2.2.1.tgz#0e87622a3325aa33e892285caf8b4e846529a525" @@ -5550,7 +5989,7 @@ ret@~0.1.10: resolved "https://registry.yarnpkg.com/ret/-/ret-0.1.15.tgz#b8a4825d5bdb1fc3f6f53c2bc33f81388681c7bc" integrity sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg== -rimraf@^2.2.8, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.3, rimraf@^2.7.1: +rimraf@2, rimraf@^2.2.8, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.3, rimraf@^2.7.1: version "2.7.1" resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec" integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w== @@ -5611,6 +6050,13 @@ sax@>=0.6.0, sax@^1.2.4: resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== +saxes@^5.0.1: + version "5.0.1" + resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d" + integrity sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw== + dependencies: + xmlchars "^2.2.0" + schema-utils@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/schema-utils/-/schema-utils-1.0.0.tgz#0b79a93204d7b600d4b2850d1f66c2a34951c770" @@ -5713,7 +6159,7 @@ set-value@^2.0.0, set-value@^2.0.1: is-plain-object "^2.0.3" split-string "^3.0.1" -setimmediate@^1.0.4: +setimmediate@^1.0.4, setimmediate@~1.0.4: version "1.0.5" resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285" integrity sha1-KQy7Iy4waULX1+qbg3Mqt4VvgoU= @@ -5791,6 +6237,15 @@ short-uuid@3.1.1: any-base "^1.1.0" uuid "^3.3.2" +side-channel@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.4.tgz#efce5c8fdc104ee751b25c58d4290011fa5ea2cf" + integrity sha512-q5XPytqFEIKHkGdiMIrY10mvLRvnQh42/+GoBlFW3b2LXLE2xxJpZFdm94we0BaoV3RwJyGqg5wS7epxTv0Zvw== + dependencies: + call-bind "^1.0.0" + get-intrinsic "^1.0.2" + object-inspect "^1.9.0" + sigmund@~1.0.0: version "1.0.1" resolved "https://registry.yarnpkg.com/sigmund/-/sigmund-1.0.1.tgz#3ff21f198cad2175f9f3b781853fd94d0d19b590" @@ -6256,6 +6711,17 @@ tapable@^1.0.0, tapable@^1.1.3: resolved "https://registry.yarnpkg.com/tapable/-/tapable-1.1.3.tgz#a1fccc06b58db61fd7a45da2da44f5f3a3e67ba2" integrity sha512-4WK/bYZmj8xLr+HUCODHGF1ZFzsYffasLUgEiMBY4fgtltdO6B4WJtlSbPaDTLpYTcGVwM2qLnFTICEcNxs3kA== +tar-stream@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287" + integrity sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ== + dependencies: + bl "^4.0.3" + end-of-stream "^1.4.1" + fs-constants "^1.0.0" + inherits "^2.0.3" + readable-stream "^3.1.1" + tar@6.0.2: version "6.0.2" resolved "https://registry.yarnpkg.com/tar/-/tar-6.0.2.tgz#5df17813468a6264ff14f766886c622b84ae2f39" @@ -6399,7 +6865,7 @@ tmp@0.0.33: dependencies: os-tmpdir "~1.0.2" -tmp@^0.2.1: +tmp@^0.2.0, tmp@^0.2.1: version "0.2.1" resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.2.1.tgz#8457fc3037dcf4719c251367a1af6500ee1ccf14" integrity sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ== @@ -6476,6 +6942,11 @@ tough-cookie@~2.5.0: psl "^1.1.28" punycode "^2.1.1" +"traverse@>=0.3.0 <0.4": + version "0.3.9" + resolved "https://registry.yarnpkg.com/traverse/-/traverse-0.3.9.tgz#717b8f220cc0bb7b44e40514c22b2e8bbc70d8b9" + integrity sha1-cXuPIgzAu3tE5AUUwisui7xw2Lk= + trim-newlines@^1.0.0: version "1.0.0" resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-1.0.0.tgz#5887966bb582a4503a41eb524f7d35011815a613" @@ -6672,6 +7143,22 @@ unzip-response@^1.0.0: resolved "https://registry.yarnpkg.com/unzip-response/-/unzip-response-1.0.2.tgz#b984f0877fc0a89c2c773cc1ef7b5b232b5b06fe" integrity sha1-uYTwh3/AqJwsdzzB73tbIytbBv4= +unzipper@^0.10.11: + version "0.10.11" + resolved "https://registry.yarnpkg.com/unzipper/-/unzipper-0.10.11.tgz#0b4991446472cbdb92ee7403909f26c2419c782e" + integrity sha512-+BrAq2oFqWod5IESRjL3S8baohbevGcVA+teAIOYWM3pDVdseogqbzhhvvmiyQrUNKFUnDMtELW3X8ykbyDCJw== + dependencies: + big-integer "^1.6.17" + binary "~0.3.0" + bluebird "~3.4.1" + buffer-indexof-polyfill "~1.0.0" + duplexer2 "~0.1.4" + fstream "^1.0.12" + graceful-fs "^4.2.2" + listenercount "~1.0.1" + readable-stream "~2.3.6" + setimmediate "~1.0.4" + upath@^1.1.1: version "1.2.0" resolved "https://registry.yarnpkg.com/upath/-/upath-1.2.0.tgz#8f66dbcd55a883acdae4408af8b035a5044c1894" @@ -6722,6 +7209,11 @@ url-parse-lax@^3.0.0: dependencies: prepend-http "^2.0.0" +url-template@^2.0.8: + version "2.0.8" + resolved "https://registry.yarnpkg.com/url-template/-/url-template-2.0.8.tgz#fc565a3cccbff7730c775f5641f9555791439f21" + integrity sha1-/FZaPMy/93MMd19WQflVV5FDnyE= + url@^0.11.0, url@~0.11.0: version "0.11.0" resolved "https://registry.yarnpkg.com/url/-/url-0.11.0.tgz#3838e97cfc60521eb73c525a8e55bfdd9e2e28f1" @@ -6776,6 +7268,11 @@ uuid@^3.3.2: resolved "https://registry.yarnpkg.com/uuid/-/uuid-3.4.0.tgz#b23e4358afa8a202fe7a100af1f5f883f02007ee" integrity sha512-HjSDRw6gZE5JMggctHBcjVak08+KEVhSIiDzFnT9S9aegmp85S/bReBVTb4QTFaRNptJ9kuYaNhnbNEOkbKb/A== +uuid@^8.0.0, uuid@^8.3.0: + version "8.3.2" + resolved "https://registry.yarnpkg.com/uuid/-/uuid-8.3.2.tgz#80d5b5ced271bb9af6c445f21a1a04c606cefbe2" + integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== + v8-compile-cache@^2.0.2: version "2.3.0" resolved "https://registry.yarnpkg.com/v8-compile-cache/-/v8-compile-cache-2.3.0.tgz#2de19618c66dc247dcfb6f99338035d8245a2cee" @@ -7021,6 +7518,11 @@ xmlbuilder@~11.0.0: resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-11.0.1.tgz#be9bae1c8a046e76b31127726347d0ad7002beb3" integrity sha512-fDlsI/kFEx7gLvbecc0/ohLG50fugQp8ryHzMTuW9vSa1GJ0XYWKnhsUx7oie3G98+r56aTQIUB4kht42R3JvA== +xmlchars@^2.2.0: + version "2.2.0" + resolved "https://registry.yarnpkg.com/xmlchars/-/xmlchars-2.2.0.tgz#060fe1bcb7f9c76fe2a17db86a9bc3ab894210cb" + integrity sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw== + xtend@^4.0.0, xtend@^4.0.1, xtend@^4.0.2, xtend@~4.0.1: version "4.0.2" resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54" @@ -7151,3 +7653,12 @@ yauzl@^2.10.0: dependencies: buffer-crc32 "~0.2.3" fd-slicer "~1.1.0" + +zip-stream@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/zip-stream/-/zip-stream-4.1.0.tgz#51dd326571544e36aa3f756430b313576dc8fc79" + integrity sha512-zshzwQW7gG7hjpBlgeQP9RuyPGNxvJdzR8SUM3QhxCnLjWN2E7j3dOvpeDcQoETfHx0urRS7EtmVToql7YpU4A== + dependencies: + archiver-utils "^2.1.0" + compress-commons "^4.1.0" + readable-stream "^3.6.0"