frontispiece / packages / line-parser / src / line.ts
line.ts
Raw
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

import { AnyToken, LinkToken, TagToken, TextToken } from "./types"
import { states } from "moo"

enum LINE_TOKEN_STATE {
	TEXT = "text",
	OPENING_TAG = "open",
	CLOSING_TAG = "close",
	LINK_TEXT = "link",
	LINK_URL = "url",
	NOTE_TEXT = "note",
	NOTE_URL = "note_url",
	END = "end",
}

export const parseLineIntoTokens = (line: string): AnyToken[] => {
	const lexer = states({
		main: {
			endTagStart: { match: /<<\//, next: "endTagName" },
			startTagStart: { match: /<</, next: "startTagName" },
			noteStart: { match: /\!\[/, next: "noteText" },
			linkStart: { match: /\[/, next: "linkText" },
			text: { match: /[^]+?/, lineBreaks: true },
		},
		startTagName: {
			tagEnd: { match: />>/, next: "main" },
			tagName: { match: /[^]+?/, lineBreaks: true },
		},
		endTagName: {
			tagEnd: { match: />>/, next: "main" },
			tagName: { match: /[^]+?/, lineBreaks: true },
		},
		linkText: {
			endLink: { match: /\]\(/, next: "linkURL" },
			endNoLink: { match: /\]/, next: "main" },
			linkText: { match: /[^]+?/, lineBreaks: true },
		},
		noteText: {
			endNote: { match: /\]\(/, next: "linkURL" },
			endNoLink: { match: /\]/, next: "main" },
			linkText: { match: /[^]+?/, lineBreaks: true },
		},
		linkURL: {
			endLinkURL: { match: /\)/, next: "main" },
			linkURL: { match: /[^]+?/, lineBreaks: true },
		},
	})

	lexer.reset(line)

	const lineTokens: AnyToken[] = []
	let state = LINE_TOKEN_STATE.TEXT
	let currentToken: AnyToken = {
		type: "text",
		text: "",
	}

	let openTags: string[] = []

	const switchState = (newState: LINE_TOKEN_STATE) => {
		switch (state) {
			case LINE_TOKEN_STATE.TEXT:
				let textToken = currentToken as TextToken
				if (textToken.text) {
					lineTokens.push(textToken)
				}
				break
			case LINE_TOKEN_STATE.OPENING_TAG:
				let openingTagToken = currentToken as TagToken
				openingTagToken.param = openingTagToken.param.trim()
				lineTokens.push(openingTagToken)
				openTags.push(openingTagToken.param)
				break
			case LINE_TOKEN_STATE.CLOSING_TAG:
				let closingTagToken = currentToken as TagToken
				closingTagToken.param = closingTagToken.param.trim()
				if (closingTagToken.param) {
					const index = openTags.indexOf(closingTagToken.param)
					if (index < 0) {
						console.warn("Trying to close a tag that was not opened")
						lineTokens.push(closingTagToken)
						break
					}

					// remove the open tag
					openTags.splice(index, 1)
					lineTokens.push(closingTagToken)
					break
				}

				// if we have an empty close tag <</>>, we instead take the last open param
				if (!openTags.length) {
					console.warn("Trying to close a tag when there are no open tags")
					break
				}
				const lastOpenTag = openTags.pop()
				closingTagToken.param = lastOpenTag!
				lineTokens.push(closingTagToken)

				break
			case LINE_TOKEN_STATE.LINK_URL:
			case LINE_TOKEN_STATE.NOTE_URL:
				lineTokens.push(currentToken)
				break
		}

		if (newState === LINE_TOKEN_STATE.END) {
			if (state === LINE_TOKEN_STATE.LINK_TEXT) {
				const linkToken = currentToken as AnyToken as LinkToken
				console.warn("Dangling left bracket. This may be intentional, but unlikely.")
				lineTokens.push({
					type: "text",
					text: `[${linkToken.text}`,
				})
			}
			if (state === LINE_TOKEN_STATE.NOTE_TEXT) {
				const linkToken = currentToken as AnyToken as LinkToken
				console.warn("Dangling left bracket. This may be intentional, but unlikely.")
				lineTokens.push({
					type: "text",
					text: `![${linkToken.text}`,
				})
			}
			if (
				state === LINE_TOKEN_STATE.LINK_URL ||
				state === LINE_TOKEN_STATE.NOTE_URL ||
				state === LINE_TOKEN_STATE.CLOSING_TAG ||
				state === LINE_TOKEN_STATE.OPENING_TAG
			) {
				console.warn("Tag was left unclosed at the end of the line.")
			}
		}

		state = newState

		switch (newState) {
			case LINE_TOKEN_STATE.TEXT:
				currentToken = { type: "text", text: "" }
				break
			case LINE_TOKEN_STATE.OPENING_TAG:
				currentToken = { type: "tag", isClosingTag: false, param: "" }
				break
			case LINE_TOKEN_STATE.CLOSING_TAG:
				currentToken = { type: "tag", isClosingTag: true, param: "" }
				break
			case LINE_TOKEN_STATE.LINK_TEXT:
				currentToken = { type: "link", target: "", text: "" }
				break
			case LINE_TOKEN_STATE.NOTE_TEXT:
				currentToken = { type: "note", target: "", text: "" }
				break
		}
	}

	for (let token of Array.from(lexer as any) as any[]) {
		// add the lexer token to the current line token
		// change state if need be.
		switch (token.type) {
			case "text":
				currentToken.text += token.value
				break
			case "startTagStart":
				switchState(LINE_TOKEN_STATE.OPENING_TAG)
				break
			case "endTagStart":
				switchState(LINE_TOKEN_STATE.CLOSING_TAG)
				break
			case "tagEnd":
				switchState(LINE_TOKEN_STATE.TEXT)
				break
			case "tagName":
				let tagToken = currentToken as AnyToken as TagToken
				tagToken.param += token.value
				break
			case "linkStart":
				switchState(LINE_TOKEN_STATE.LINK_TEXT)
				break
			case "noteStart":
				switchState(LINE_TOKEN_STATE.NOTE_TEXT)
				break
			case "linkText":
				currentToken.text += token.value
				break
			case "endLink":
				switchState(LINE_TOKEN_STATE.LINK_URL)
				break
			case "endNote":
				switchState(LINE_TOKEN_STATE.NOTE_URL)
				break
			case "endNoLink":
				const existingText = `[${currentToken.text}]`
				switchState(LINE_TOKEN_STATE.TEXT)
				currentToken.text = existingText
				break
			case "linkURL":
				const linkToken = currentToken as AnyToken as LinkToken
				linkToken.target += token.value
				break
			case "endLinkURL":
				switchState(LINE_TOKEN_STATE.TEXT)
		}
	}

	switchState(LINE_TOKEN_STATE.END)

	return lineTokens
}