From 175fc0c3212b4af4d5f24482595823f2567f8f1e Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Tue, 28 Nov 2023 20:56:27 -0700 Subject: [PATCH] fix: clean up rules so they can be typed (#3087) BREAKING CHANGE: Lexer.rules object has been changed so it can be properly types. Some intermediate rules have been removed. --- src/Instance.ts | 36 ++-- src/Lexer.ts | 6 +- src/Tokenizer.ts | 134 ++++++------ src/helpers.ts | 16 +- src/rules.ts | 448 ++++++++++++++++++--------------------- test/types/marked.ts | 5 +- test/unit/marked.test.js | 2 +- 7 files changed, 301 insertions(+), 346 deletions(-) diff --git a/src/Instance.ts b/src/Instance.ts index d2d52745cd..2ca489f297 100644 --- a/src/Instance.ts +++ b/src/Instance.ts @@ -142,11 +142,14 @@ export class Marked { if (pack.renderer) { const renderer = this.defaults.renderer || new _Renderer(this.defaults); for (const prop in pack.renderer) { - const rendererFunc = pack.renderer[prop as keyof MarkedExtension['renderer']] as GenericRendererFunction; - const rendererKey = prop as keyof _Renderer; - const prevRenderer = renderer[rendererKey] as GenericRendererFunction; + if (!(prop in renderer) || prop === 'options') { + throw new Error(`renderer '${prop}' does not exist`); + } + const rendererProp = prop as Exclude; + const rendererFunc = pack.renderer[rendererProp] as GenericRendererFunction; + const prevRenderer = renderer[rendererProp] as GenericRendererFunction; // Replace renderer with func to run extension, but fall back if false - renderer[rendererKey] = (...args: unknown[]) => { + renderer[rendererProp] = (...args: unknown[]) => { let ret = rendererFunc.apply(renderer, args); if (ret === false) { ret = prevRenderer.apply(renderer, args); @@ -159,11 +162,15 @@ export class Marked { if (pack.tokenizer) { const tokenizer = this.defaults.tokenizer || new _Tokenizer(this.defaults); for (const prop in pack.tokenizer) { - const tokenizerFunc = pack.tokenizer[prop as keyof MarkedExtension['tokenizer']] as UnknownFunction; - const tokenizerKey = prop as keyof _Tokenizer; - const prevTokenizer = tokenizer[tokenizerKey] as UnknownFunction; + if (!(prop in tokenizer) || ['options', 'rules', 'lexer'].includes(prop)) { + throw new Error(`tokenizer '${prop}' does not exist`); + } + const tokenizerProp = prop as Exclude; + const tokenizerFunc = pack.tokenizer[tokenizerProp] as UnknownFunction; + const prevTokenizer = tokenizer[tokenizerProp] as UnknownFunction; // Replace tokenizer with func to run extension, but fall back if false - tokenizer[tokenizerKey] = (...args: unknown[]) => { + // @ts-expect-error cannot type tokenizer function dynamically + tokenizer[tokenizerProp] = (...args: unknown[]) => { let ret = tokenizerFunc.apply(tokenizer, args); if (ret === false) { ret = prevTokenizer.apply(tokenizer, args); @@ -178,11 +185,14 @@ export class Marked { if (pack.hooks) { const hooks = this.defaults.hooks || new _Hooks(); for (const prop in pack.hooks) { - const hooksFunc = pack.hooks[prop as keyof MarkedExtension['hooks']] as UnknownFunction; - const hooksKey = prop as keyof _Hooks; - const prevHook = hooks[hooksKey] as UnknownFunction; + if (!(prop in hooks) || prop === 'options') { + throw new Error(`hook '${prop}' does not exist`); + } + const hooksProp = prop as Exclude; + const hooksFunc = pack.hooks[hooksProp] as UnknownFunction; + const prevHook = hooks[hooksProp] as UnknownFunction; if (_Hooks.passThroughHooks.has(prop)) { - hooks[hooksKey as 'preprocess' | 'postprocess'] = (arg: string | undefined) => { + hooks[hooksProp] = (arg: string | undefined) => { if (this.defaults.async) { return Promise.resolve(hooksFunc.call(hooks, arg)).then(ret => { return prevHook.call(hooks, ret) as string; @@ -193,7 +203,7 @@ export class Marked { return prevHook.call(hooks, ret) as string; }; } else { - hooks[hooksKey] = (...args: unknown[]) => { + hooks[hooksProp] = (...args: unknown[]) => { let ret = hooksFunc.apply(hooks, args); if (ret === false) { ret = prevHook.apply(hooks, args); diff --git a/src/Lexer.ts b/src/Lexer.ts index a645552024..b26a1cb407 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -3,7 +3,6 @@ import { _defaults } from './defaults.ts'; import { block, inline } from './rules.ts'; import type { Token, TokensList, Tokens } from './Tokens.ts'; import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts'; -import type { Rules } from './rules.ts'; /** * Block Lexer @@ -22,8 +21,7 @@ export class _Lexer { constructor(options?: MarkedOptions) { // TokenList cannot be created in one go - // @ts-expect-error - this.tokens = []; + this.tokens = [] as unknown as TokensList; this.tokens.links = Object.create(null); this.options = options || _defaults; this.options.tokenizer = this.options.tokenizer || new _Tokenizer(); @@ -59,7 +57,7 @@ export class _Lexer { /** * Expose Rules */ - static get rules(): Rules { + static get rules() { return { block, inline diff --git a/src/Tokenizer.ts b/src/Tokenizer.ts index 94c2ed3b36..9daee0b6a4 100644 --- a/src/Tokenizer.ts +++ b/src/Tokenizer.ts @@ -5,6 +5,7 @@ import { escape, findClosingBracket } from './helpers.ts'; +import type { Rules } from './rules.ts'; import type { _Lexer } from './Lexer.ts'; import type { Links, Tokens } from './Tokens.ts'; import type { MarkedOptions } from './MarkedOptions.ts'; @@ -69,9 +70,8 @@ function indentCodeCompensation(raw: string, text: string) { */ export class _Tokenizer { options: MarkedOptions; - // TODO: Fix this rules type - rules: any; - lexer!: _Lexer; + rules!: Rules; // set by the lexer + lexer!: _Lexer; // set by the lexer constructor(options?: MarkedOptions) { this.options = options || _defaults; @@ -111,7 +111,7 @@ export class _Tokenizer { return { type: 'code', raw, - lang: cap[2] ? cap[2].trim().replace(this.rules.inline._escapes, '$1') : cap[2], + lang: cap[2] ? cap[2].trim().replace(this.rules.inline.anyPunctuation, '$1') : cap[2], text }; } @@ -182,7 +182,7 @@ export class _Tokenizer { ordered: isordered, start: isordered ? +bull.slice(0, -1) : '', loose: false, - items: [] as Tokens.ListItem[] + items: [] }; bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`; @@ -207,10 +207,10 @@ export class _Tokenizer { break; } - raw = cap[0] as string; + raw = cap[0]; src = src.substring(raw.length); - let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length)) as string; + let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length)); let nextLine = src.split('\n', 1)[0]; let indent = 0; @@ -338,7 +338,7 @@ export class _Tokenizer { // Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic list.items[list.items.length - 1].raw = raw.trimEnd(); - (list.items[list.items.length - 1] as Tokens.ListItem).text = itemContents.trimEnd(); + (list.items[list.items.length - 1]).text = itemContents.trimEnd(); list.raw = list.raw.trimEnd(); // Item child tokens handled here at end because we needed to have the final item to trim it first @@ -384,8 +384,8 @@ export class _Tokenizer { const cap = this.rules.block.def.exec(src); if (cap) { const tag = cap[1].toLowerCase().replace(/\s+/g, ' '); - const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline._escapes, '$1') : ''; - const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline._escapes, '$1') : cap[3]; + const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline.anyPunctuation, '$1') : ''; + const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3]; return { type: 'def', tag, @@ -398,67 +398,61 @@ export class _Tokenizer { table(src: string): Tokens.Table | undefined { const cap = this.rules.block.table.exec(src); - if (cap) { - if (!/[:|]/.test(cap[2])) { - // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading - return; - } + if (!cap) { + return; + } - const item: Tokens.Table = { - type: 'table', - raw: cap[0], - header: splitCells(cap[1]).map(c => { - return { text: c, tokens: [] }; - }), - align: cap[2].replace(/^\||\| *$/g, '').split('|'), - rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [] - }; + if (!/[:|]/.test(cap[2])) { + // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading + return; + } - if (item.header.length === item.align.length) { - let l = item.align.length; - let i, j, k, row; - for (i = 0; i < l; i++) { - const align = item.align[i]; - if (align) { - if (/^ *-+: *$/.test(align)) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(align)) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(align)) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - } + const headers = splitCells(cap[1]); + const aligns = cap[2].replace(/^\||\| *$/g, '').split('|'); + const rows = cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : []; - l = item.rows.length; - for (i = 0; i < l; i++) { - item.rows[i] = splitCells(item.rows[i] as unknown as string, item.header.length).map(c => { - return { text: c, tokens: [] }; - }); - } + const item: Tokens.Table = { + type: 'table', + raw: cap[0], + header: [], + align: [], + rows: [] + }; - // parse child tokens inside headers and cells + if (headers.length !== aligns.length) { + // header and align columns must be equal, rows can be different. + return; + } - // header child tokens - l = item.header.length; - for (j = 0; j < l; j++) { - item.header[j].tokens = this.lexer.inline(item.header[j].text); - } + for (const align of aligns) { + if (/^ *-+: *$/.test(align)) { + item.align.push('right'); + } else if (/^ *:-+: *$/.test(align)) { + item.align.push('center'); + } else if (/^ *:-+ *$/.test(align)) { + item.align.push('left'); + } else { + item.align.push(null); + } + } - // cell child tokens - l = item.rows.length; - for (j = 0; j < l; j++) { - row = item.rows[j]; - for (k = 0; k < row.length; k++) { - row[k].tokens = this.lexer.inline(row[k].text); - } - } + for (const header of headers) { + item.header.push({ + text: header, + tokens: this.lexer.inline(header) + }); + } - return item; - } + for (const row of rows) { + item.rows.push(splitCells(row, item.header.length).map(cell => { + return { + text: cell, + tokens: this.lexer.inline(cell) + }; + })); } + + return item; } lheading(src: string): Tokens.Heading | undefined { @@ -587,8 +581,8 @@ export class _Tokenizer { } } return outputLink(cap, { - href: href ? href.replace(this.rules.inline._escapes, '$1') : href, - title: title ? title.replace(this.rules.inline._escapes, '$1') : title + href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href, + title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title }, cap[0], this.lexer); } } @@ -597,8 +591,8 @@ export class _Tokenizer { let cap; if ((cap = this.rules.inline.reflink.exec(src)) || (cap = this.rules.inline.nolink.exec(src))) { - let link = (cap[2] || cap[1]).replace(/\s+/g, ' '); - link = links[link.toLowerCase()]; + const linkString = (cap[2] || cap[1]).replace(/\s+/g, ' '); + const link = links[linkString.toLowerCase()]; if (!link) { const text = cap[0].charAt(0); return { @@ -612,7 +606,7 @@ export class _Tokenizer { } emStrong(src: string, maskedSrc: string, prevChar = ''): Tokens.Em | Tokens.Strong | undefined { - let match = this.rules.inline.emStrong.lDelim.exec(src); + let match = this.rules.inline.emStrongLDelim.exec(src); if (!match) return; // _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well @@ -625,7 +619,7 @@ export class _Tokenizer { const lLength = [...match[0]].length - 1; let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0; - const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd; + const endReg = match[0][0] === '*' ? this.rules.inline.emStrongRDelimAst : this.rules.inline.emStrongRDelimUnd; endReg.lastIndex = 0; // Clip maskedSrc to same section of string as src (move to lexer?) @@ -761,7 +755,7 @@ export class _Tokenizer { let prevCapZero; do { prevCapZero = cap[0]; - cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; + cap[0] = this.rules.inline._backpedal.exec(cap[0])?.[0] ?? ''; } while (prevCapZero !== cap[0]); text = escape(cap[0]); if (cap[1] === 'www.') { diff --git a/src/helpers.ts b/src/helpers.ts index 82de9135f3..32e1ae6f5f 100644 --- a/src/helpers.ts +++ b/src/helpers.ts @@ -1,5 +1,3 @@ -import type { Rule } from './rules.ts'; - /** * Helpers */ @@ -48,18 +46,18 @@ export function unescape(html: string) { const caret = /(^|[^\[])\^/g; -export function edit(regex: Rule, opt?: string) { - regex = typeof regex === 'string' ? regex : regex.source; +export function edit(regex: string | RegExp, opt?: string) { + let source = typeof regex === 'string' ? regex : regex.source; opt = opt || ''; const obj = { replace: (name: string | RegExp, val: string | RegExp) => { - val = typeof val === 'object' && 'source' in val ? val.source : val; - val = val.replace(caret, '$1'); - regex = (regex as string).replace(name, val); + let valSource = typeof val === 'string' ? val : val.source; + valSource = valSource.replace(caret, '$1'); + source = source.replace(name, valSource); return obj; }, getRegex: () => { - return new RegExp(regex, opt); + return new RegExp(source, opt); } }; return obj; @@ -74,7 +72,7 @@ export function cleanUrl(href: string) { return href; } -export const noopTest = { exec: () => null }; +export const noopTest = { exec: () => null } as unknown as RegExp; export function splitCells(tableRow: string, count?: number) { // ensure that every cell-delimiting pipe has a space diff --git a/src/rules.ts b/src/rules.ts index 4489fb0e1a..6c758961e4 100644 --- a/src/rules.ts +++ b/src/rules.ts @@ -1,136 +1,57 @@ import { - noopTest, - edit + edit, noopTest } from './helpers.ts'; -export type Rule = RegExp | string; - -export interface Rules { - [ruleName: string]: Pick | Rule | Rules; -} - -type BlockRuleNames = - | 'newline' - | 'code' - | 'fences' - | 'hr' - | 'heading' - | 'blockquote' - | 'list' - | 'html' - | 'def' - | 'lheading' - | '_paragraph' - | 'text' - | '_label' - | '_title' - | 'bullet' - | 'listItemStart' - | '_tag' - | '_comment' - | 'paragraph' - | 'uote' ; - -type BlockSubRuleNames = 'normal' | 'gfm' | 'pedantic'; - -type InlineRuleNames = - | 'escape' - | 'autolink' - | 'tag' - | 'link' - | 'reflink' - | 'nolink' - | 'reflinkSearch' - | 'code' - | 'br' - | 'text' - | '_punctuation' - | 'punctuation' - | 'blockSkip' - | 'escapedEmSt' - | '_comment' - | '_escapes' - | '_scheme' - | '_email' - | '_attribute' - | '_label' - | '_href' - | '_title' - | 'strong' - | '_extended_email' - | '_backpedal'; - -type InlineSubRuleNames = 'gfm' | 'emStrong' | 'normal' | 'pedantic'| 'breaks'; - /** * Block-Level Grammar */ -// Not all rules are defined in the object literal -// @ts-expect-error -export const block: Record & Record & Rules = { - newline: /^(?: *(?:\n|$))+/, - code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, - fences: /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/, - hr: /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/, - heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/, - blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/, - list: /^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/, - html: '^ {0,3}(?:' // optional indentation - + '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) - + '|comment[^\\n]*(\\n+|$)' // (2) - + '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) - + '|\\n*|$)' // (4) - + '|\\n*|$)' // (5) - + '|)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) - + '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag - + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag - + ')', - def: /^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/, - table: noopTest, - lheading: /^(?!bull )((?:.|\n(?!\s*?\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/, - // regex template, placeholders will be replaced according to different paragraph - // interruption rules of commonmark and the original markdown spec: - _paragraph: /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/, - text: /^[^\n]+/ -}; -block._label = /(?!\s*\])(?:\\.|[^\[\]\\])+/; -block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; -block.def = edit(block.def) - .replace('label', block._label) - .replace('title', block._title) +const newline = /^(?: *(?:\n|$))+/; +const blockCode = /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/; +const fences = /^ {0,3}(`{3,}(?=[^`\n]*(?:\n|$))|~{3,})([^\n]*)(?:\n|$)(?:|([\s\S]*?)(?:\n|$))(?: {0,3}\1[~`]* *(?=\n|$)|$)/; +const hr = /^ {0,3}((?:-[\t ]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})(?:\n+|$)/; +const heading = /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/; +const bullet = /(?:[*+-]|\d{1,9}[.)])/; +const lheading = edit(/^(?!bull )((?:.|\n(?!\s*?\n|bull ))+?)\n {0,3}(=+|-+) *(?:\n+|$)/) + .replace(/bull/g, bullet) // lists can interrupt .getRegex(); - -block.bullet = /(?:[*+-]|\d{1,9}[.)])/; -block.listItemStart = edit(/^( *)(bull) */) - .replace('bull', block.bullet) +const _paragraph = /^([^\n]+(?:\n(?!hr|heading|lheading|blockquote|fences|list|html|table| +\n)[^\n]+)*)/; +const blockText = /^[^\n]+/; +const _blockLabel = /(?!\s*\])(?:\\.|[^\[\]\\])+/; +const def = edit(/^ {0,3}\[(label)\]: *(?:\n *)?([^<\s][^\s]*|<.*?>)(?:(?: +(?:\n *)?| *\n *)(title))? *(?:\n+|$)/) + .replace('label', _blockLabel) + .replace('title', /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/) .getRegex(); -block.list = edit(block.list) - .replace(/bull/g, block.bullet) - .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') - .replace('def', '\\n+(?=' + block.def.source + ')') +const list = edit(/^( {0,3}bull)([ \t][^\n]+?)?(?:\n|$)/) + .replace(/bull/g, bullet) .getRegex(); -block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' +const _tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; -block._comment = /|$)/; -block.html = edit(block.html, 'i') - .replace('comment', block._comment) - .replace('tag', block._tag) +const _comment = /|$)/; +const html = edit( + '^ {0,3}(?:' // optional indentation ++ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:[^\\n]*\\n+|$)' // (1) ++ '|comment[^\\n]*(\\n+|$)' // (2) ++ '|<\\?[\\s\\S]*?(?:\\?>\\n*|$)' // (3) ++ '|\\n*|$)' // (4) ++ '|\\n*|$)' // (5) ++ '|)[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (6) ++ '|<(?!script|pre|style|textarea)([a-z][\\w-]*)(?:attribute)*? */?>(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) open tag ++ '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag ++ ')', 'i') + .replace('comment', _comment) + .replace('tag', _tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); -block.lheading = edit(block.lheading) - .replace(/bull/g, block.bullet) // lists can interrupt - .getRegex(); - -block.paragraph = edit(block._paragraph) - .replace('hr', block.hr) +const paragraph = edit(_paragraph) + .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('|table', '') @@ -138,63 +59,80 @@ block.paragraph = edit(block._paragraph) .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') - .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks + .replace('tag', _tag) // pars can be interrupted by type (6) html blocks .getRegex(); -block.blockquote = edit(block.blockquote) - .replace('paragraph', block.paragraph) +const blockquote = edit(/^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/) + .replace('paragraph', paragraph) .getRegex(); /** * Normal Block Grammar */ -block.normal = { ...block }; +const blockNormal = { + blockquote, + code: blockCode, + def, + fences, + heading, + hr, + html, + lheading, + list, + newline, + paragraph, + table: noopTest, + text: blockText +}; + +type BlockKeys = keyof typeof blockNormal; /** * GFM Block Grammar */ -block.gfm = { - ...block.normal, - table: '^ *([^\\n ].*)\\n' // Header - + ' {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)' // Align - + '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells -}; - -block.gfm.table = edit(block.gfm.table as Rule) - .replace('hr', block.hr) +const gfmTable = edit( + '^ *([^\\n ].*)\\n' // Header ++ ' {0,3}((?:\\| *)?:?-+:? *(?:\\| *:?-+:? *)*(?:\\| *)?)' // Align ++ '(?:\\n((?:(?! *\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)') // Cells + .replace('hr', hr) .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|textarea|!--)') - .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks + .replace('tag', _tag) // tables can be interrupted by type (6) html blocks .getRegex(); -block.gfm.paragraph = edit(block._paragraph) - .replace('hr', block.hr) - .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') - .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs - .replace('table', block.gfm.table as RegExp) // interrupt paragraphs with table - .replace('blockquote', ' {0,3}>') - .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') - .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt - .replace('html', ')|<(?:script|pre|style|textarea|!--)') - .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks - .getRegex(); +const blockGfm: Record = { + ...blockNormal, + table: gfmTable, + paragraph: edit(_paragraph) + .replace('hr', hr) + .replace('heading', ' {0,3}#{1,6}(?:\\s|$)') + .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs + .replace('table', gfmTable) // interrupt paragraphs with table + .replace('blockquote', ' {0,3}>') + .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') + .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt + .replace('html', ')|<(?:script|pre|style|textarea|!--)') + .replace('tag', _tag) // pars can be interrupted by type (6) html blocks + .getRegex() +}; + /** * Pedantic grammar (original John Gruber's loose markdown specification) */ -block.pedantic = { - ...block.normal, +const blockPedantic: Record = { + ...blockNormal, html: edit( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + '|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') - .replace('comment', block._comment) + .replace('comment', _comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' @@ -204,148 +142,146 @@ block.pedantic = { heading: /^(#{1,6})(.*)(?:\n+|$)/, fences: noopTest, // fences not supported lheading: /^(.+?)\n {0,3}(=+|-+) *(?:\n+|$)/, - paragraph: edit(block.normal._paragraph as Rule) - .replace('hr', block.hr) + paragraph: edit(_paragraph) + .replace('hr', hr) .replace('heading', ' *#{1,6} *[^\n]') - .replace('lheading', block.lheading) + .replace('lheading', lheading) + .replace('|table', '') .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') .replace('|html', '') + .replace('|tag', '') .getRegex() }; /** * Inline-Level Grammar */ -// Not all rules are defined in the object literal -// @ts-expect-error -export const inline: Record & Record & Rules = { - escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, - autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, - url: noopTest, - tag: '^comment' - + '|^' // self-closing tag - + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag - + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. - + '|^' // declaration, e.g. - + '|^', // CDATA section - link: /^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/, - reflink: /^!?\[(label)\]\[(ref)\]/, - nolink: /^!?\[(ref)\](?:\[\])?/, - reflinkSearch: 'reflink|nolink(?!\\()', - emStrong: { - lDelim: /^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, - // (1) and (2) can only be a Right Delimiter. (3) and (4) can only be Left. (5) and (6) can be either Left or Right. - // | Skip orphan inside strong | Consume to delim | (1) #*** | (2) a***#, a*** | (3) #***a, ***a | (4) ***# | (5) #***# | (6) a***a - rDelimAst: /^[^_*]*?__[^_*]*?\*[^_*]*?(?=__)|[^*]+(?=[^*])|(?!\*)[punct](\*+)(?=[\s]|$)|[^punct\s](\*+)(?!\*)(?=[punct\s]|$)|(?!\*)[punct\s](\*+)(?=[^punct\s])|[\s](\*+)(?!\*)(?=[punct])|(?!\*)[punct](\*+)(?!\*)(?=[punct])|[^punct\s](\*+)(?=[^punct\s])/, - rDelimUnd: /^[^_*]*?\*\*[^_*]*?_[^_*]*?(?=\*\*)|[^_]+(?=[^_])|(?!_)[punct](_+)(?=[\s]|$)|[^punct\s](_+)(?!_)(?=[punct\s]|$)|(?!_)[punct\s](_+)(?=[^punct\s])|[\s](_+)(?!_)(?=[punct])|(?!_)[punct](_+)(?!_)(?=[punct])/ // ^- Not allowed for _ - }, - code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, - br: /^( {2,}|\\)\n(?!\s*$)/, - del: noopTest, - text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\?@\[\]\\^_`{|}~])/; +const inlineCode = /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/; +const br = /^( {2,}|\\)\n(?!\s*$)/; +const inlineText = /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\`^|~'; -inline.punctuation = edit(inline.punctuation, 'u').replace(/punctuation/g, inline._punctuation).getRegex(); +const _punctuation = '\\p{P}$+<=>`^|~'; +const punctuation = edit(/^((?![*_])[\spunctuation])/, 'u') + .replace(/punctuation/g, _punctuation).getRegex(); // sequences em should skip over [title](link), `code`, -inline.blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g; -inline.anyPunctuation = /\\[punct]/g; -inline._escapes = /\\([punct])/g; - -inline._comment = edit(block._comment).replace('(?:-->|$)', '-->').getRegex(); - -inline.emStrong.lDelim = edit(inline.emStrong.lDelim as Rule, 'u') - .replace(/punct/g, inline._punctuation) - .getRegex(); +const blockSkip = /\[[^[\]]*?\]\([^\(\)]*?\)|`[^`]*?`|<[^<>]*?>/g; -inline.emStrong.rDelimAst = edit(inline.emStrong.rDelimAst as Rule, 'gu') - .replace(/punct/g, inline._punctuation) +const emStrongLDelim = edit(/^(?:\*+(?:((?!\*)[punct])|[^\s*]))|^_+(?:((?!_)[punct])|([^\s_]))/, 'u') + .replace(/punct/g, _punctuation) .getRegex(); -inline.emStrong.rDelimUnd = edit(inline.emStrong.rDelimUnd as Rule, 'gu') - .replace(/punct/g, inline._punctuation) +const emStrongRDelimAst = edit( + '^[^_*]*?__[^_*]*?\\*[^_*]*?(?=__)' // Skip orphan inside strong ++ '|[^*]+(?=[^*])' // Consume to delim ++ '|(?!\\*)[punct](\\*+)(?=[\\s]|$)' // (1) #*** can only be a Right Delimiter ++ '|[^punct\\s](\\*+)(?!\\*)(?=[punct\\s]|$)' // (2) a***#, a*** can only be a Right Delimiter ++ '|(?!\\*)[punct\\s](\\*+)(?=[^punct\\s])' // (3) #***a, ***a can only be Left Delimiter ++ '|[\\s](\\*+)(?!\\*)(?=[punct])' // (4) ***# can only be Left Delimiter ++ '|(?!\\*)[punct](\\*+)(?!\\*)(?=[punct])' // (5) #***# can be either Left or Right Delimiter ++ '|[^punct\\s](\\*+)(?=[^punct\\s])', 'gu') // (6) a***a can be either Left or Right Delimiter + .replace(/punct/g, _punctuation) .getRegex(); -inline.anyPunctuation = edit(inline.anyPunctuation as Rule, 'gu') - .replace(/punct/g, inline._punctuation) +// (6) Not allowed for _ +const emStrongRDelimUnd = edit( + '^[^_*]*?\\*\\*[^_*]*?_[^_*]*?(?=\\*\\*)' // Skip orphan inside strong ++ '|[^_]+(?=[^_])' // Consume to delim ++ '|(?!_)[punct](_+)(?=[\\s]|$)' // (1) #___ can only be a Right Delimiter ++ '|[^punct\\s](_+)(?!_)(?=[punct\\s]|$)' // (2) a___#, a___ can only be a Right Delimiter ++ '|(?!_)[punct\\s](_+)(?=[^punct\\s])' // (3) #___a, ___a can only be Left Delimiter ++ '|[\\s](_+)(?!_)(?=[punct])' // (4) ___# can only be Left Delimiter ++ '|(?!_)[punct](_+)(?!_)(?=[punct])', 'gu') // (5) #___# can be either Left or Right Delimiter + .replace(/punct/g, _punctuation) .getRegex(); -inline._escapes = edit(inline._escapes, 'gu') - .replace(/punct/g, inline._punctuation) +const anyPunctuation = edit(/\\([punct])/, 'gu') + .replace(/punct/g, _punctuation) .getRegex(); -inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; -inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; -inline.autolink = edit(inline.autolink) - .replace('scheme', inline._scheme) - .replace('email', inline._email) +const autolink = edit(/^<(scheme:[^\s\x00-\x1f<>]*|email)>/) + .replace('scheme', /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/) + .replace('email', /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/) .getRegex(); -inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; - -inline.tag = edit(inline.tag) - .replace('comment', inline._comment) - .replace('attribute', inline._attribute) +const _inlineComment = edit(_comment).replace('(?:-->|$)', '-->').getRegex(); +const tag = edit( + '^comment' + + '|^' // self-closing tag + + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag + + '|^<\\?[\\s\\S]*?\\?>' // processing instruction, e.g. + + '|^' // declaration, e.g. + + '|^') // CDATA section + .replace('comment', _inlineComment) + .replace('attribute', /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/) .getRegex(); -inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; -inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; -inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; +const _inlineLabel = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; -inline.link = edit(inline.link) - .replace('label', inline._label) - .replace('href', inline._href) - .replace('title', inline._title) +const link = edit(/^!?\[(label)\]\(\s*(href)(?:\s+(title))?\s*\)/) + .replace('label', _inlineLabel) + .replace('href', /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/) + .replace('title', /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/) .getRegex(); -inline.reflink = edit(inline.reflink) - .replace('label', inline._label) - .replace('ref', block._label) +const reflink = edit(/^!?\[(label)\]\[(ref)\]/) + .replace('label', _inlineLabel) + .replace('ref', _blockLabel) .getRegex(); -inline.nolink = edit(inline.nolink) - .replace('ref', block._label) +const nolink = edit(/^!?\[(ref)\](?:\[\])?/) + .replace('ref', _blockLabel) .getRegex(); -inline.reflinkSearch = edit(inline.reflinkSearch, 'g') - .replace('reflink', inline.reflink) - .replace('nolink', inline.nolink) +const reflinkSearch = edit('reflink|nolink(?!\\()', 'g') + .replace('reflink', reflink) + .replace('nolink', nolink) .getRegex(); /** * Normal Inline Grammar */ -inline.normal = { ...inline }; +const inlineNormal = { + _backpedal: noopTest, // only used for GFM url + anyPunctuation, + autolink, + blockSkip, + br, + code: inlineCode, + del: noopTest, + emStrongLDelim, + emStrongRDelimAst, + emStrongRDelimUnd, + escape, + link, + nolink, + punctuation, + reflink, + reflinkSearch, + tag, + text: inlineText, + url: noopTest +}; + +type InlineKeys = keyof typeof inlineNormal; /** * Pedantic Inline Grammar */ -inline.pedantic = { - ...inline.normal, - strong: { - start: /^__|\*\*/, - middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, - endAst: /\*\*(?!\*)/g, - endUnd: /__(?!_)/g - }, - em: { - start: /^_|\*/, - middle: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/, - endAst: /\*(?!\*)/g, - endUnd: /_(?!_)/g - }, +const inlinePedantic: Record = { + ...inlineNormal, link: edit(/^!?\[(label)\]\((.*?)\)/) - .replace('label', inline._label) + .replace('label', _inlineLabel) .getRegex(), reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) - .replace('label', inline._label) + .replace('label', _inlineLabel) .getRegex() }; @@ -353,28 +289,48 @@ inline.pedantic = { * GFM Inline Grammar */ -inline.gfm = { - ...inline.normal, - escape: edit(inline.escape).replace('])', '~|])').getRegex(), - _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, - url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, +const inlineGfm: Record = { + ...inlineNormal, + escape: edit(escape).replace('])', '~|])').getRegex(), + url: edit(/^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, 'i') + .replace('email', /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/) + .getRegex(), _backpedal: /(?:[^?!.,:;*_'"~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_'"~)]+(?!$))+/, del: /^(~~?)(?=[^\s~])([\s\S]*?[^\s~])\1(?=[^~]|$)/, text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\ = { + ...inlineGfm, + br: edit(br).replace('{2,}', '*').getRegex(), + text: edit(inlineGfm.text) .replace('\\b_', '\\b_| {2,}\\n') .replace(/\{2,\}/g, '*') .getRegex() }; + +/** + * exports + */ + +export const block = { + normal: blockNormal, + gfm: blockGfm, + pedantic: blockPedantic +}; + +export const inline = { + normal: inlineNormal, + gfm: inlineGfm, + breaks: inlineBreaks, + pedantic: inlinePedantic +}; + +export interface Rules { + block: Record + inline: Record +} diff --git a/test/types/marked.ts b/test/types/marked.ts index dcf9da5e3f..571edaf789 100644 --- a/test/types/marked.ts +++ b/test/types/marked.ts @@ -1,11 +1,10 @@ /* eslint-disable */ import { marked } from 'marked'; -import { expectType } from 'ts-expect'; // other exports import { Lexer, Parser, Tokenizer, Renderer, TextRenderer } from 'marked'; -import type { Tokens, MarkedExtension, TokenizerAndRendererExtension, Token ,TokenizerExtension, MarkedOptions, TokensList, Rules, RendererExtension } from 'marked'; +import type { Tokens, MarkedExtension, TokenizerAndRendererExtension, Token ,TokenizerExtension, MarkedOptions, TokensList, RendererExtension } from 'marked'; const tokenizer = new marked.Tokenizer(); @@ -76,7 +75,7 @@ console.log(tokens3); // verifying that the second param to inlineTokens can be ignored const tokens3a = lexer.inlineTokens(text); console.log(tokens3a); -const re: Rules = marked.Lexer.rules; +const re: Record>> = marked.Lexer.rules; const lexerOptions: MarkedOptions = lexer.options; const renderer = new marked.Renderer(); diff --git a/test/unit/marked.test.js b/test/unit/marked.test.js index 5bfc80b754..a56fd1f7d6 100644 --- a/test/unit/marked.test.js +++ b/test/unit/marked.test.js @@ -569,7 +569,7 @@ used extension2 walked

assert.ok(marked.defaults.async); }); - it.only('should be async if any extension in use is async', () => { + it('should be async if any extension in use is async', () => { marked.use({ async: true }); marked.use({ async: false });