core/src/pipeline/04-tokens/index.ts · recital

/**
 * Copyright (c) 2022 Amorphous
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

import eol from 'eol'
import moo from 'moo'

import { pipe } from '../../tools/pipe'
import { createTrimWhitespaceTokens } from '../../tools/split-preserve-whitespace'
import {
	FragmentedSceneObject,
	ParagraphToken,
	RawEmptyObject,
	RawFragmentObject,
	RawTextToken,
	Token,
	TokenizedEmptyObject,
	TokenizedFragmentedSceneObject,
	TokenizedFragmentObject,
} from '../../types'
import { preprocessSoftLines } from './plugins/softlines'
import { createCommandObject } from '../../tools/command-shorthand'
import { separateAtShorthand } from '../../tools/at-shorthand'
import { exposeMeta } from '../../tools/expose-meta'

export const genTokensForScene = (scene: FragmentedSceneObject): TokenizedFragmentedSceneObject => {
	const newFragments: (TokenizedFragmentObject | TokenizedEmptyObject)[] = []
	for (let fragment of scene.fragments) {
		const newFragment = genTokenizedFragment(fragment)
		newFragments.push(newFragment)
	}

	return { ...scene, fragments: newFragments }
}
export const genTokenizedFragment = (
	fragment: RawFragmentObject | RawEmptyObject
): TokenizedFragmentObject | TokenizedEmptyObject => {
	const trimmedLineSections = createTrimWhitespaceTokens(fragment.raw)
	const tokens: Token[] = []

	for (let section of trimmedLineSections) {
		if (section.type === 'none') {
			const newTokens = createTokensForTextSection(section.text)
			for (let t of newTokens) {
				tokens.push(t)
			}
			continue
		}

		const newToken: RawTextToken = {
			type: 'rawText',
			text: section.text,
			startTag: section.startTag,
			endTag: section.endTag,
		}

		tokens.push(newToken)
	}

	const newFragmentObject = {
		...fragment,
		tokens,
	}

	return newFragmentObject
}

const createTokensForTextSection = (text: string): Token[] => {
	// first let's preprocess soft lines, so they don't get separated out.
	const preprocessedString = pipe(text, [preprocessSoftLines])
	const tokens: Token[] = []

	const lexer = moo.states({
		main: {
			startCommand: { match: /^\$/, next: 'command' },
			startAt: { match: /^@/, next: 'at' },
			startMetaTag: {
				match: /(?<!\n)\n<!/,
				lineBreaks: true,
				push: 'meta',
			},
			emptyLine: { match: /\n/, lineBreaks: true },
			text: { match: /[^]+?/, lineBreaks: true },
		},
		command: {
			endCommand: { match: /\n/, lineBreaks: true, next: 'main' },
			commandText: { match: /[^]+?/, lineBreaks: true },
		},
		meta: {
			endMetaTag: {
				match: />/,
				pop: 1,
			},
			meta: {
				match: /[^]+?/,
				lineBreaks: true,
			},
		},
		at: {
			colonWithWS: { match: /: /, next: 'main' },
			colon: { match: /:/, next: 'main' },
			ws: { match: /[ \t]/, next: 'main' },
			shorthandText: { match: /.+?/ },
		},
	})

	lexer.reset(eol.lf(preprocessedString))

	let currentBuffer: string = ''
	let existingToken: ParagraphToken | null = null

	const createParagraphToken = () => {
		if (existingToken) {
			existingToken.text = currentBuffer
			tokens.push(existingToken)
			existingToken = null
		} else {
			// create a new paragraph token
			const newToken: ParagraphToken = {
				type: 'paragraph',
				text: currentBuffer,
			}
			tokens.push(newToken)
		}
	}
	for (let token of Array.from(lexer) as any[]) {
		switch (token.type) {
			case 'text':
				currentBuffer += token.value
				break
			case 'emptyLine':
				if (currentBuffer.length) {
					createParagraphToken()

					// reset the buffer
					currentBuffer = ''
				}
				break

			// @-shorthand cases
			case 'startAt':
				currentBuffer = '@'
				break
			case 'shorthandText':
				currentBuffer += token.value
				break

			case 'colonWithWS':
			case 'colon':
			case 'ws':
				const meta = separateAtShorthand(currentBuffer)
				currentBuffer = ''
				existingToken = {
					type: 'paragraph',
					meta,
					text: '',
				}
				exposeMeta(existingToken)
				break

			// command cases
			case 'startCommand':
				currentBuffer = '$'
				break
			case 'commandText':
				currentBuffer += token.value
				break
			case 'endCommand':
				// create a new command token from current buffer
				const commandToken = createCommandObject(currentBuffer)
				tokens.push(commandToken)
				currentBuffer = ''
				break

			// meta cases
			case 'startMetaTag':
				if (currentBuffer.length) {
					createParagraphToken()
				}
				currentBuffer = ''
				break
			case 'meta':
				currentBuffer += token.value
				break
			case 'endMetaTag':
				if (tokens.length === 0) {
					throw new Error("Created a post-paragraph meta tag that doesn't modify a paragraph")
				}
				let modifiedToken = tokens[tokens.length - 1] as ParagraphToken
				if (!modifiedToken.meta) {
					modifiedToken.meta = {}
				}
				modifiedToken.meta._ = currentBuffer
				currentBuffer = ''
				break
		}
	}

	// finish off the last token.
	if (currentBuffer.length) {
		createParagraphToken()
	}

	return tokens
}