gristlabs_grist-core/app/server/lib/httpEncoding.ts

44 lines
1.0 KiB
TypeScript
Raw Permalink Normal View History

// Based on the source code of the Body.textConverted method in node-fetch
export function httpEncoding(header: string | null, content: Buffer): string | undefined {
let res: RegExpExecArray | null = null;
// header
if (header) {
res = /charset=([^;]*)/i.exec(header);
}
// no charset in content type, peek at response body for at most 1024 bytes
const str = content.slice(0, 1024).toString();
// html5
if (!res && str) {
res = /<meta.+?charset=(['"])(.+?)\1/i.exec(str);
}
// html4
if (!res && str) {
res = /<meta\s+?http-equiv=(['"])content-type\1\s+?content=(['"])(.+?)\2/i.exec(str);
if (res) {
res = /charset=(.*)/i.exec(res.pop()!);
}
}
// xml
if (!res && str) {
res = /<\?xml.+?encoding=(['"])(.+?)\1/i.exec(str);
}
// found charset
if (res) {
let charset = res.pop();
// prevent decode issues when sites use incorrect encoding
// ref: https://hsivonen.fi/encoding-menu/
if (charset === 'gb2312' || charset === 'gbk') {
charset = 'gb18030';
}
return charset;
}
}