recital / core / src / tools / split-preserve-whitespace.ts
split-preserve-whitespace.ts
Raw
/**
 * Copyright (c) 2022 Amorphous
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

import eol from 'eol'
import { TrimWhitespaceToken } from '../types'

type PreserveWhitespaceTag = {
	start: RegExp | string
	end: RegExp | string
	startTag?: string
	endTag?: string
	type: string
}

const preserveWhitespaceTags: PreserveWhitespaceTag[] = [
	{
		start: /^```$/gm,
		end: /^```$/gm,
		startTag: '```',
		endTag: '```',
		type: 'code',
	},
	{
		start: /\<pre\>/gm,
		end: /\<\/pre\>/gm,
		startTag: '<pre>',
		endTag: '</pre>',
		type: 'pre',
	},
	{
		start: /^\!==$/gm,
		end: /^\!==$/gm,
		type: 'custom',
	},
]

/**
 * Chops up a string into an array, separated by whether that section of the string should be trimmed or not.
 * The only places where we wouldn't want to trim are in code tags, or in specially-defined !== sections.
 * @param raw - the string to chop up.
 */
export const createTrimWhitespaceTokens = (raw: string): TrimWhitespaceToken[] => {
	const lines = eol.split(raw)
	const tokens: TrimWhitespaceToken[] = []

	let currentString = ''
	let currentTag: PreserveWhitespaceTag | null = null
	for (let line of lines) {
		if (currentTag === null) {
			let matched: boolean = false
			for (let tag of preserveWhitespaceTags) {
				if (line.match(tag.start)) {
					currentTag = tag
					tokens.push({ text: currentString, type: 'none' })
					currentString = ''
					matched = true
					break
				}
			}
			if (!matched) {
				currentString += line + '\n'
			}
			continue
		}

		if (line.match(currentTag.end)) {
			tokens.push({
				text: currentString,
				type: currentTag.type,
				startTag: currentTag.startTag,
				endTag: currentTag.endTag,
			})
			currentTag = null
			currentString = ''
			continue
		}

		currentString += line + '\n'
	}
	if (currentString.length > 0) {
		if (currentTag === null) {
			tokens.push({ text: currentString, type: 'none' })
		} else {
			tokens.push({
				text: currentString,
				type: currentTag.type,
				startTag: currentTag.startTag,
				endTag: currentTag.endTag,
			})
		}
	}
	return tokens
}