Skip to content

Commit

Permalink
fix: clean up rules so they can be typed (#3087)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: Lexer.rules object has been changed so it can be properly types. Some intermediate rules have been removed.
  • Loading branch information
UziTech committed Nov 29, 2023
1 parent edae309 commit 175fc0c
Show file tree
Hide file tree
Showing 7 changed files with 301 additions and 346 deletions.
36 changes: 23 additions & 13 deletions src/Instance.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,14 @@ export class Marked {
if (pack.renderer) {
const renderer = this.defaults.renderer || new _Renderer(this.defaults);
for (const prop in pack.renderer) {
const rendererFunc = pack.renderer[prop as keyof MarkedExtension['renderer']] as GenericRendererFunction;
const rendererKey = prop as keyof _Renderer;
const prevRenderer = renderer[rendererKey] as GenericRendererFunction;
if (!(prop in renderer) || prop === 'options') {
throw new Error(`renderer '${prop}' does not exist`);
}
const rendererProp = prop as Exclude<keyof _Renderer, 'options'>;
const rendererFunc = pack.renderer[rendererProp] as GenericRendererFunction;
const prevRenderer = renderer[rendererProp] as GenericRendererFunction;
// Replace renderer with func to run extension, but fall back if false
renderer[rendererKey] = (...args: unknown[]) => {
renderer[rendererProp] = (...args: unknown[]) => {
let ret = rendererFunc.apply(renderer, args);
if (ret === false) {
ret = prevRenderer.apply(renderer, args);
Expand All @@ -159,11 +162,15 @@ export class Marked {
if (pack.tokenizer) {
const tokenizer = this.defaults.tokenizer || new _Tokenizer(this.defaults);
for (const prop in pack.tokenizer) {
const tokenizerFunc = pack.tokenizer[prop as keyof MarkedExtension['tokenizer']] as UnknownFunction;
const tokenizerKey = prop as keyof _Tokenizer;
const prevTokenizer = tokenizer[tokenizerKey] as UnknownFunction;
if (!(prop in tokenizer) || ['options', 'rules', 'lexer'].includes(prop)) {
throw new Error(`tokenizer '${prop}' does not exist`);
}
const tokenizerProp = prop as Exclude<keyof _Tokenizer, 'options' | 'rules' | 'lexer'>;
const tokenizerFunc = pack.tokenizer[tokenizerProp] as UnknownFunction;
const prevTokenizer = tokenizer[tokenizerProp] as UnknownFunction;
// Replace tokenizer with func to run extension, but fall back if false
tokenizer[tokenizerKey] = (...args: unknown[]) => {
// @ts-expect-error cannot type tokenizer function dynamically
tokenizer[tokenizerProp] = (...args: unknown[]) => {
let ret = tokenizerFunc.apply(tokenizer, args);
if (ret === false) {
ret = prevTokenizer.apply(tokenizer, args);
Expand All @@ -178,11 +185,14 @@ export class Marked {
if (pack.hooks) {
const hooks = this.defaults.hooks || new _Hooks();
for (const prop in pack.hooks) {
const hooksFunc = pack.hooks[prop as keyof MarkedExtension['hooks']] as UnknownFunction;
const hooksKey = prop as keyof _Hooks;
const prevHook = hooks[hooksKey] as UnknownFunction;
if (!(prop in hooks) || prop === 'options') {
throw new Error(`hook '${prop}' does not exist`);
}
const hooksProp = prop as Exclude<keyof _Hooks, 'options'>;
const hooksFunc = pack.hooks[hooksProp] as UnknownFunction;
const prevHook = hooks[hooksProp] as UnknownFunction;
if (_Hooks.passThroughHooks.has(prop)) {
hooks[hooksKey as 'preprocess' | 'postprocess'] = (arg: string | undefined) => {
hooks[hooksProp] = (arg: string | undefined) => {
if (this.defaults.async) {
return Promise.resolve(hooksFunc.call(hooks, arg)).then(ret => {
return prevHook.call(hooks, ret) as string;
Expand All @@ -193,7 +203,7 @@ export class Marked {
return prevHook.call(hooks, ret) as string;
};
} else {
hooks[hooksKey] = (...args: unknown[]) => {
hooks[hooksProp] = (...args: unknown[]) => {
let ret = hooksFunc.apply(hooks, args);
if (ret === false) {
ret = prevHook.apply(hooks, args);
Expand Down
6 changes: 2 additions & 4 deletions src/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import { _defaults } from './defaults.ts';
import { block, inline } from './rules.ts';
import type { Token, TokensList, Tokens } from './Tokens.ts';
import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts';
import type { Rules } from './rules.ts';

/**
* Block Lexer
Expand All @@ -22,8 +21,7 @@ export class _Lexer {

constructor(options?: MarkedOptions) {
// TokenList cannot be created in one go
// @ts-expect-error
this.tokens = [];
this.tokens = [] as unknown as TokensList;
this.tokens.links = Object.create(null);
this.options = options || _defaults;
this.options.tokenizer = this.options.tokenizer || new _Tokenizer();
Expand Down Expand Up @@ -59,7 +57,7 @@ export class _Lexer {
/**
* Expose Rules
*/
static get rules(): Rules {
static get rules() {
return {
block,
inline
Expand Down
134 changes: 64 additions & 70 deletions src/Tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
escape,
findClosingBracket
} from './helpers.ts';
import type { Rules } from './rules.ts';
import type { _Lexer } from './Lexer.ts';
import type { Links, Tokens } from './Tokens.ts';
import type { MarkedOptions } from './MarkedOptions.ts';
Expand Down Expand Up @@ -69,9 +70,8 @@ function indentCodeCompensation(raw: string, text: string) {
*/
export class _Tokenizer {
options: MarkedOptions;
// TODO: Fix this rules type
rules: any;
lexer!: _Lexer;
rules!: Rules; // set by the lexer
lexer!: _Lexer; // set by the lexer

constructor(options?: MarkedOptions) {
this.options = options || _defaults;
Expand Down Expand Up @@ -111,7 +111,7 @@ export class _Tokenizer {
return {
type: 'code',
raw,
lang: cap[2] ? cap[2].trim().replace(this.rules.inline._escapes, '$1') : cap[2],
lang: cap[2] ? cap[2].trim().replace(this.rules.inline.anyPunctuation, '$1') : cap[2],
text
};
}
Expand Down Expand Up @@ -182,7 +182,7 @@ export class _Tokenizer {
ordered: isordered,
start: isordered ? +bull.slice(0, -1) : '',
loose: false,
items: [] as Tokens.ListItem[]
items: []
};

bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;
Expand All @@ -207,10 +207,10 @@ export class _Tokenizer {
break;
}

raw = cap[0] as string;
raw = cap[0];
src = src.substring(raw.length);

let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length)) as string;
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length));
let nextLine = src.split('\n', 1)[0];

let indent = 0;
Expand Down Expand Up @@ -338,7 +338,7 @@ export class _Tokenizer {

// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
list.items[list.items.length - 1].raw = raw.trimEnd();
(list.items[list.items.length - 1] as Tokens.ListItem).text = itemContents.trimEnd();
(list.items[list.items.length - 1]).text = itemContents.trimEnd();
list.raw = list.raw.trimEnd();

// Item child tokens handled here at end because we needed to have the final item to trim it first
Expand Down Expand Up @@ -384,8 +384,8 @@ export class _Tokenizer {
const cap = this.rules.block.def.exec(src);
if (cap) {
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline._escapes, '$1') : '';
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline._escapes, '$1') : cap[3];
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3];
return {
type: 'def',
tag,
Expand All @@ -398,67 +398,61 @@ export class _Tokenizer {

table(src: string): Tokens.Table | undefined {
const cap = this.rules.block.table.exec(src);
if (cap) {
if (!/[:|]/.test(cap[2])) {
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
return;
}
if (!cap) {
return;
}

const item: Tokens.Table = {
type: 'table',
raw: cap[0],
header: splitCells(cap[1]).map(c => {
return { text: c, tokens: [] };
}),
align: cap[2].replace(/^\||\| *$/g, '').split('|'),
rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : []
};
if (!/[:|]/.test(cap[2])) {
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
return;
}

if (item.header.length === item.align.length) {
let l = item.align.length;
let i, j, k, row;
for (i = 0; i < l; i++) {
const align = item.align[i];
if (align) {
if (/^ *-+: *$/.test(align)) {
item.align[i] = 'right';
} else if (/^ *:-+: *$/.test(align)) {
item.align[i] = 'center';
} else if (/^ *:-+ *$/.test(align)) {
item.align[i] = 'left';
} else {
item.align[i] = null;
}
}
}
const headers = splitCells(cap[1]);
const aligns = cap[2].replace(/^\||\| *$/g, '').split('|');
const rows = cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [];

l = item.rows.length;
for (i = 0; i < l; i++) {
item.rows[i] = splitCells(item.rows[i] as unknown as string, item.header.length).map(c => {
return { text: c, tokens: [] };
});
}
const item: Tokens.Table = {
type: 'table',
raw: cap[0],
header: [],
align: [],
rows: []
};

// parse child tokens inside headers and cells
if (headers.length !== aligns.length) {
// header and align columns must be equal, rows can be different.
return;
}

// header child tokens
l = item.header.length;
for (j = 0; j < l; j++) {
item.header[j].tokens = this.lexer.inline(item.header[j].text);
}
for (const align of aligns) {
if (/^ *-+: *$/.test(align)) {
item.align.push('right');
} else if (/^ *:-+: *$/.test(align)) {
item.align.push('center');
} else if (/^ *:-+ *$/.test(align)) {
item.align.push('left');
} else {
item.align.push(null);
}
}

// cell child tokens
l = item.rows.length;
for (j = 0; j < l; j++) {
row = item.rows[j];
for (k = 0; k < row.length; k++) {
row[k].tokens = this.lexer.inline(row[k].text);
}
}
for (const header of headers) {
item.header.push({
text: header,
tokens: this.lexer.inline(header)
});
}

return item;
}
for (const row of rows) {
item.rows.push(splitCells(row, item.header.length).map(cell => {
return {
text: cell,
tokens: this.lexer.inline(cell)
};
}));
}

return item;
}

lheading(src: string): Tokens.Heading | undefined {
Expand Down Expand Up @@ -587,8 +581,8 @@ export class _Tokenizer {
}
}
return outputLink(cap, {
href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
title: title ? title.replace(this.rules.inline._escapes, '$1') : title
href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href,
title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title
}, cap[0], this.lexer);
}
}
Expand All @@ -597,8 +591,8 @@ export class _Tokenizer {
let cap;
if ((cap = this.rules.inline.reflink.exec(src))
|| (cap = this.rules.inline.nolink.exec(src))) {
let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
link = links[link.toLowerCase()];
const linkString = (cap[2] || cap[1]).replace(/\s+/g, ' ');
const link = links[linkString.toLowerCase()];
if (!link) {
const text = cap[0].charAt(0);
return {
Expand All @@ -612,7 +606,7 @@ export class _Tokenizer {
}

emStrong(src: string, maskedSrc: string, prevChar = ''): Tokens.Em | Tokens.Strong | undefined {
let match = this.rules.inline.emStrong.lDelim.exec(src);
let match = this.rules.inline.emStrongLDelim.exec(src);
if (!match) return;

// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
Expand All @@ -625,7 +619,7 @@ export class _Tokenizer {
const lLength = [...match[0]].length - 1;
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;

const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
const endReg = match[0][0] === '*' ? this.rules.inline.emStrongRDelimAst : this.rules.inline.emStrongRDelimUnd;
endReg.lastIndex = 0;

// Clip maskedSrc to same section of string as src (move to lexer?)
Expand Down Expand Up @@ -761,7 +755,7 @@ export class _Tokenizer {
let prevCapZero;
do {
prevCapZero = cap[0];
cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
cap[0] = this.rules.inline._backpedal.exec(cap[0])?.[0] ?? '';
} while (prevCapZero !== cap[0]);
text = escape(cap[0]);
if (cap[1] === 'www.') {
Expand Down
16 changes: 7 additions & 9 deletions src/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import type { Rule } from './rules.ts';

/**
* Helpers
*/
Expand Down Expand Up @@ -48,18 +46,18 @@ export function unescape(html: string) {

const caret = /(^|[^\[])\^/g;

export function edit(regex: Rule, opt?: string) {
regex = typeof regex === 'string' ? regex : regex.source;
export function edit(regex: string | RegExp, opt?: string) {
let source = typeof regex === 'string' ? regex : regex.source;
opt = opt || '';
const obj = {
replace: (name: string | RegExp, val: string | RegExp) => {
val = typeof val === 'object' && 'source' in val ? val.source : val;
val = val.replace(caret, '$1');
regex = (regex as string).replace(name, val);
let valSource = typeof val === 'string' ? val : val.source;
valSource = valSource.replace(caret, '$1');
source = source.replace(name, valSource);
return obj;
},
getRegex: () => {
return new RegExp(regex, opt);
return new RegExp(source, opt);
}
};
return obj;
Expand All @@ -74,7 +72,7 @@ export function cleanUrl(href: string) {
return href;
}

export const noopTest = { exec: () => null };
export const noopTest = { exec: () => null } as unknown as RegExp;

export function splitCells(tableRow: string, count?: number) {
// ensure that every cell-delimiting pipe has a space
Expand Down
Loading

1 comment on commit 175fc0c

@vercel
Copy link

@vercel vercel bot commented on 175fc0c Nov 29, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.