const FIRST_BIT = 0x80; const FIRST_TWO_BITS = 0xc0; const FIRST_THREE_BITS = 0xe0; const FIRST_FOUR_BITS = 0xf0; const FIRST_FIVE_BITS = 0xf8; const TWO_BIT_CHAR = 0xc0; const THREE_BIT_CHAR = 0xe0; const FOUR_BIT_CHAR = 0xf0; const CONTINUING_CHAR = 0x80; /** * Determines if the passed in bytes are valid utf8 * @param bytes - An array of 8-bit bytes. Must be indexable and have length property * @param start - The index to start validating * @param end - The index to end validating */ export function validateUtf8( bytes: { [index: number]: number }, start: number, end: number ): boolean { let continuation = 0; for (let i = start; i < end; i += 1) { const byte = bytes[i]; if (continuation) { if ((byte & FIRST_TWO_BITS) !== CONTINUING_CHAR) { return false; } continuation -= 1; } else if (byte & FIRST_BIT) { if ((byte & FIRST_THREE_BITS) === TWO_BIT_CHAR) { continuation = 1; } else if ((byte & FIRST_FOUR_BITS) === THREE_BIT_CHAR) { continuation = 2; } else if ((byte & FIRST_FIVE_BITS) === FOUR_BIT_CHAR) { continuation = 3; } else { return false; } } } return !continuation; }