mirror of
				https://codeberg.org/yeentown/barkey.git
				synced 2025-10-26 19:14:12 +00:00 
			
		
		
		
	Improve MFM bracket matching
Co-authored-by: syuilo <syuilotan@yahoo.co.jp>
This commit is contained in:
		
							parent
							
								
									be0cb88b6c
								
							
						
					
					
						commit
						580191fb17
					
				
					 3 changed files with 115 additions and 28 deletions
				
			
		|  | @ -1,7 +1,7 @@ | ||||||
| import * as P from 'parsimmon'; | import * as P from 'parsimmon'; | ||||||
| import parseAcct from '../misc/acct/parse'; | import parseAcct from '../misc/acct/parse'; | ||||||
| import { toUnicode } from 'punycode'; | import { toUnicode } from 'punycode'; | ||||||
| import { takeWhile } from '../prelude/array'; | import { takeWhile, cumulativeSum } from '../prelude/array'; | ||||||
| import { Tree } from '../prelude/tree'; | import { Tree } from '../prelude/tree'; | ||||||
| import * as T from '../prelude/tree'; | import * as T from '../prelude/tree'; | ||||||
| 
 | 
 | ||||||
|  | @ -42,30 +42,18 @@ export function createTree(type: string, children: MfmForest, props: any): MfmTr | ||||||
| 	return T.createTree({ type, props }, children); | 	return T.createTree({ type, props }, children); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| function getTrailingPosition(x: string): number { | export function removeOrphanedBrackets(s: string): string { | ||||||
| 	const brackets = [ | 	const openBrackets = ['(', '「']; | ||||||
| 		['(', ')'], | 	const closeBrackets = [')', '」']; | ||||||
| 		['「', '」'], | 	const xs = cumulativeSum(s.split('').map(c => { | ||||||
| 	]; | 		if (openBrackets.includes(c)) return 1; | ||||||
| 	const pendingBrackets = [] as any; | 		if (closeBrackets.includes(c)) return -1; | ||||||
| 	const end = x.split('').findIndex(char => { | 		return 0; | ||||||
| 		const closeMatch = brackets.map(x => x[1]).indexOf(char); | 	})); | ||||||
| 		const openMatch = brackets.map(x => x[0]).indexOf(char); | 	const firstOrphanedCloseBracket = xs.findIndex(x => x < 0); | ||||||
| 		if (closeMatch != -1) { | 	if (firstOrphanedCloseBracket !== -1) return s.substr(0, firstOrphanedCloseBracket); | ||||||
| 			if (pendingBrackets[closeMatch] > 0) { | 	const lastMatched = xs.lastIndexOf(0); | ||||||
| 				pendingBrackets[closeMatch]--; | 	return s.substr(0, lastMatched + 1); | ||||||
| 				return false; |  | ||||||
| 			} else { |  | ||||||
| 				return true; |  | ||||||
| 			} |  | ||||||
| 		} else if (openMatch != -1) { |  | ||||||
| 			pendingBrackets[openMatch] = (pendingBrackets[openMatch] || 0) + 1; |  | ||||||
| 			return false; |  | ||||||
| 		} else { |  | ||||||
| 			return false; |  | ||||||
| 		} |  | ||||||
| 	}); |  | ||||||
| 	return end > 0 ? end : x.length; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const newline = P((input, i) => { | const newline = P((input, i) => { | ||||||
|  | @ -220,7 +208,7 @@ const mfm = P.createLanguage({ | ||||||
| 			const match = text.match(/^#([^\s\.,!\?#]+)/i); | 			const match = text.match(/^#([^\s\.,!\?#]+)/i); | ||||||
| 			if (!match) return P.makeFailure(i, 'not a hashtag'); | 			if (!match) return P.makeFailure(i, 'not a hashtag'); | ||||||
| 			let hashtag = match[1]; | 			let hashtag = match[1]; | ||||||
| 			hashtag = hashtag.substr(0, getTrailingPosition(hashtag)); | 			hashtag = removeOrphanedBrackets(hashtag); | ||||||
| 			if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag'); | 			if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag'); | ||||||
| 			if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag'); | 			if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag'); | ||||||
| 			if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag'); | 			if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag'); | ||||||
|  | @ -390,7 +378,7 @@ const mfm = P.createLanguage({ | ||||||
| 			const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); | 			const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); | ||||||
| 			if (!match) return P.makeFailure(i, 'not a url'); | 			if (!match) return P.makeFailure(i, 'not a url'); | ||||||
| 			let url = match[0]; | 			let url = match[0]; | ||||||
| 			url = url.substr(0, getTrailingPosition(url)); | 			url = removeOrphanedBrackets(url); | ||||||
| 			if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); | 			if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); | ||||||
| 			if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); | 			if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); | ||||||
| 			return P.makeSuccess(i + url.length, url); | 			return P.makeSuccess(i + url.length, url); | ||||||
|  |  | ||||||
|  | @ -109,3 +109,9 @@ export function takeWhile<T>(f: Predicate<T>, xs: T[]): T[] { | ||||||
| 	} | 	} | ||||||
| 	return ys; | 	return ys; | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | export function cumulativeSum(xs: number[]): number[] { | ||||||
|  | 	const ys = Array.from(xs); // deep copy
 | ||||||
|  | 	for (let i = 1; i < ys.length; i++) ys[i] += ys[i - 1]; | ||||||
|  | 	return ys; | ||||||
|  | } | ||||||
|  |  | ||||||
							
								
								
									
										95
									
								
								test/mfm.ts
									
										
									
									
									
								
							
							
						
						
									
										95
									
								
								test/mfm.ts
									
										
									
									
									
								
							|  | @ -6,7 +6,7 @@ import * as assert from 'assert'; | ||||||
| 
 | 
 | ||||||
| import analyze from '../src/mfm/parse'; | import analyze from '../src/mfm/parse'; | ||||||
| import toHtml from '../src/mfm/html'; | import toHtml from '../src/mfm/html'; | ||||||
| import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/parser'; | import { createTree as tree, createLeaf as leaf, MfmTree, removeOrphanedBrackets } from '../src/mfm/parser'; | ||||||
| 
 | 
 | ||||||
| function text(text: string): MfmTree { | function text(text: string): MfmTree { | ||||||
| 	return leaf('text', { text }); | 	return leaf('text', { text }); | ||||||
|  | @ -49,6 +49,99 @@ describe('createTree', () => { | ||||||
| 	}); | 	}); | ||||||
| }); | }); | ||||||
| 
 | 
 | ||||||
|  | describe('removeOrphanedBrackets', () => { | ||||||
|  | 	it('single (contained)', () => { | ||||||
|  | 		const input = '(foo)'; | ||||||
|  | 		const expected = '(foo)'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('single (head)', () => { | ||||||
|  | 		const input = '(foo)bar'; | ||||||
|  | 		const expected = '(foo)bar'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('single (tail)', () => { | ||||||
|  | 		const input = 'foo(bar)'; | ||||||
|  | 		const expected = 'foo(bar)'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('a', () => { | ||||||
|  | 		const input = '(foo'; | ||||||
|  | 		const expected = ''; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('b', () => { | ||||||
|  | 		const input = ')foo'; | ||||||
|  | 		const expected = ''; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('nested', () => { | ||||||
|  | 		const input = 'foo(「(bar)」)'; | ||||||
|  | 		const expected = 'foo(「(bar)」)'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('no brackets', () => { | ||||||
|  | 		const input = 'foo'; | ||||||
|  | 		const expected = 'foo'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('with foreign bracket (single)', () => { | ||||||
|  | 		const input = 'foo(bar))'; | ||||||
|  | 		const expected = 'foo(bar)'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('with foreign bracket (open)', () => { | ||||||
|  | 		const input = 'foo(bar'; | ||||||
|  | 		const expected = 'foo'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('with foreign bracket (close)', () => { | ||||||
|  | 		const input = 'foo)bar'; | ||||||
|  | 		const expected = 'foo'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('with foreign bracket (close and open)', () => { | ||||||
|  | 		const input = 'foo)(bar'; | ||||||
|  | 		const expected = 'foo'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('various bracket type', () => { | ||||||
|  | 		const input = 'foo「(bar)」('; | ||||||
|  | 		const expected = 'foo「(bar)」'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('intersected', () => { | ||||||
|  | 		const input = 'foo(「)」'; | ||||||
|  | 		const expected = 'foo(「)」'; | ||||||
|  | 		const actual = removeOrphanedBrackets(input); | ||||||
|  | 		assert.deepStrictEqual(actual, expected); | ||||||
|  | 	}); | ||||||
|  | }); | ||||||
|  | 
 | ||||||
| describe('MFM', () => { | describe('MFM', () => { | ||||||
| 	it('can be analyzed', () => { | 	it('can be analyzed', () => { | ||||||
| 		const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr'); | 		const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr'); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue