404 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			404 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
|   | /*--------------------------------------------------------------------------------------------- | ||
|  |  *  Copyright (c) Microsoft Corporation. All rights reserved. | ||
|  |  *  Licensed under the MIT License. See License.txt in the project root for license information. | ||
|  |  *--------------------------------------------------------------------------------------------*/ | ||
|  | import * as l10n from '@vscode/l10n'; | ||
|  | import { TokenType, ScannerState } from '../htmlLanguageTypes'; | ||
|  | class MultiLineStream { | ||
|  |     constructor(source, position) { | ||
|  |         this.source = source; | ||
|  |         this.len = source.length; | ||
|  |         this.position = position; | ||
|  |     } | ||
|  |     eos() { | ||
|  |         return this.len <= this.position; | ||
|  |     } | ||
|  |     getSource() { | ||
|  |         return this.source; | ||
|  |     } | ||
|  |     pos() { | ||
|  |         return this.position; | ||
|  |     } | ||
|  |     goBackTo(pos) { | ||
|  |         this.position = pos; | ||
|  |     } | ||
|  |     goBack(n) { | ||
|  |         this.position -= n; | ||
|  |     } | ||
|  |     advance(n) { | ||
|  |         this.position += n; | ||
|  |     } | ||
|  |     goToEnd() { | ||
|  |         this.position = this.source.length; | ||
|  |     } | ||
|  |     nextChar() { | ||
|  |         return this.source.charCodeAt(this.position++) || 0; | ||
|  |     } | ||
|  |     peekChar(n = 0) { | ||
|  |         return this.source.charCodeAt(this.position + n) || 0; | ||
|  |     } | ||
|  |     advanceIfChar(ch) { | ||
|  |         if (ch === this.source.charCodeAt(this.position)) { | ||
|  |             this.position++; | ||
|  |             return true; | ||
|  |         } | ||
|  |         return false; | ||
|  |     } | ||
|  |     advanceIfChars(ch) { | ||
|  |         let i; | ||
|  |         if (this.position + ch.length > this.source.length) { | ||
|  |             return false; | ||
|  |         } | ||
|  |         for (i = 0; i < ch.length; i++) { | ||
|  |             if (this.source.charCodeAt(this.position + i) !== ch[i]) { | ||
|  |                 return false; | ||
|  |             } | ||
|  |         } | ||
|  |         this.advance(i); | ||
|  |         return true; | ||
|  |     } | ||
|  |     advanceIfRegExp(regex) { | ||
|  |         const str = this.source.substr(this.position); | ||
|  |         const match = str.match(regex); | ||
|  |         if (match) { | ||
|  |             this.position = this.position + match.index + match[0].length; | ||
|  |             return match[0]; | ||
|  |         } | ||
|  |         return ''; | ||
|  |     } | ||
|  |     advanceUntilRegExp(regex) { | ||
|  |         const str = this.source.substr(this.position); | ||
|  |         const match = str.match(regex); | ||
|  |         if (match) { | ||
|  |             this.position = this.position + match.index; | ||
|  |             return match[0]; | ||
|  |         } | ||
|  |         else { | ||
|  |             this.goToEnd(); | ||
|  |         } | ||
|  |         return ''; | ||
|  |     } | ||
|  |     advanceUntilChar(ch) { | ||
|  |         while (this.position < this.source.length) { | ||
|  |             if (this.source.charCodeAt(this.position) === ch) { | ||
|  |                 return true; | ||
|  |             } | ||
|  |             this.advance(1); | ||
|  |         } | ||
|  |         return false; | ||
|  |     } | ||
|  |     advanceUntilChars(ch) { | ||
|  |         while (this.position + ch.length <= this.source.length) { | ||
|  |             let i = 0; | ||
|  |             for (; i < ch.length && this.source.charCodeAt(this.position + i) === ch[i]; i++) { | ||
|  |             } | ||
|  |             if (i === ch.length) { | ||
|  |                 return true; | ||
|  |             } | ||
|  |             this.advance(1); | ||
|  |         } | ||
|  |         this.goToEnd(); | ||
|  |         return false; | ||
|  |     } | ||
|  |     skipWhitespace() { | ||
|  |         const n = this.advanceWhileChar(ch => { | ||
|  |             return ch === _WSP || ch === _TAB || ch === _NWL || ch === _LFD || ch === _CAR; | ||
|  |         }); | ||
|  |         return n > 0; | ||
|  |     } | ||
|  |     advanceWhileChar(condition) { | ||
|  |         const posNow = this.position; | ||
|  |         while (this.position < this.len && condition(this.source.charCodeAt(this.position))) { | ||
|  |             this.position++; | ||
|  |         } | ||
|  |         return this.position - posNow; | ||
|  |     } | ||
|  | } | ||
|  | const _BNG = '!'.charCodeAt(0); | ||
|  | const _MIN = '-'.charCodeAt(0); | ||
|  | const _LAN = '<'.charCodeAt(0); | ||
|  | const _RAN = '>'.charCodeAt(0); | ||
|  | const _FSL = '/'.charCodeAt(0); | ||
|  | const _EQS = '='.charCodeAt(0); | ||
|  | const _DQO = '"'.charCodeAt(0); | ||
|  | const _SQO = '\''.charCodeAt(0); | ||
|  | const _NWL = '\n'.charCodeAt(0); | ||
|  | const _CAR = '\r'.charCodeAt(0); | ||
|  | const _LFD = '\f'.charCodeAt(0); | ||
|  | const _WSP = ' '.charCodeAt(0); | ||
|  | const _TAB = '\t'.charCodeAt(0); | ||
|  | const htmlScriptContents = { | ||
|  |     'text/x-handlebars-template': true, | ||
|  |     // Fix for https://github.com/microsoft/vscode/issues/77977
 | ||
|  |     'text/html': true, | ||
|  | }; | ||
|  | export function createScanner(input, initialOffset = 0, initialState = ScannerState.WithinContent, emitPseudoCloseTags = false) { | ||
|  |     const stream = new MultiLineStream(input, initialOffset); | ||
|  |     let state = initialState; | ||
|  |     let tokenOffset = 0; | ||
|  |     let tokenType = TokenType.Unknown; | ||
|  |     let tokenError; | ||
|  |     let hasSpaceAfterTag; | ||
|  |     let lastTag; | ||
|  |     let lastAttributeName; | ||
|  |     let lastTypeValue; | ||
|  |     function nextElementName() { | ||
|  |         return stream.advanceIfRegExp(/^[_:\w][_:\w-.\d]*/).toLowerCase(); | ||
|  |     } | ||
|  |     function nextAttributeName() { | ||
|  |         return stream.advanceIfRegExp(/^[^\s"'></=\x00-\x0F\x7F\x80-\x9F]*/).toLowerCase(); | ||
|  |     } | ||
|  |     function finishToken(offset, type, errorMessage) { | ||
|  |         tokenType = type; | ||
|  |         tokenOffset = offset; | ||
|  |         tokenError = errorMessage; | ||
|  |         return type; | ||
|  |     } | ||
|  |     function scan() { | ||
|  |         const offset = stream.pos(); | ||
|  |         const oldState = state; | ||
|  |         const token = internalScan(); | ||
|  |         if (token !== TokenType.EOS && offset === stream.pos() && !(emitPseudoCloseTags && (token === TokenType.StartTagClose || token === TokenType.EndTagClose))) { | ||
|  |             console.warn('Scanner.scan has not advanced at offset ' + offset + ', state before: ' + oldState + ' after: ' + state); | ||
|  |             stream.advance(1); | ||
|  |             return finishToken(offset, TokenType.Unknown); | ||
|  |         } | ||
|  |         return token; | ||
|  |     } | ||
|  |     function internalScan() { | ||
|  |         const offset = stream.pos(); | ||
|  |         if (stream.eos()) { | ||
|  |             return finishToken(offset, TokenType.EOS); | ||
|  |         } | ||
|  |         let errorMessage; | ||
|  |         switch (state) { | ||
|  |             case ScannerState.WithinComment: | ||
|  |                 if (stream.advanceIfChars([_MIN, _MIN, _RAN])) { // -->
 | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.EndCommentTag); | ||
|  |                 } | ||
|  |                 stream.advanceUntilChars([_MIN, _MIN, _RAN]); // -->
 | ||
|  |                 return finishToken(offset, TokenType.Comment); | ||
|  |             case ScannerState.WithinDoctype: | ||
|  |                 if (stream.advanceIfChar(_RAN)) { | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.EndDoctypeTag); | ||
|  |                 } | ||
|  |                 stream.advanceUntilChar(_RAN); // >
 | ||
|  |                 return finishToken(offset, TokenType.Doctype); | ||
|  |             case ScannerState.WithinContent: | ||
|  |                 if (stream.advanceIfChar(_LAN)) { // <
 | ||
|  |                     if (!stream.eos() && stream.peekChar() === _BNG) { // !
 | ||
|  |                         if (stream.advanceIfChars([_BNG, _MIN, _MIN])) { // <!--
 | ||
|  |                             state = ScannerState.WithinComment; | ||
|  |                             return finishToken(offset, TokenType.StartCommentTag); | ||
|  |                         } | ||
|  |                         if (stream.advanceIfRegExp(/^!doctype/i)) { | ||
|  |                             state = ScannerState.WithinDoctype; | ||
|  |                             return finishToken(offset, TokenType.StartDoctypeTag); | ||
|  |                         } | ||
|  |                     } | ||
|  |                     if (stream.advanceIfChar(_FSL)) { // /
 | ||
|  |                         state = ScannerState.AfterOpeningEndTag; | ||
|  |                         return finishToken(offset, TokenType.EndTagOpen); | ||
|  |                     } | ||
|  |                     state = ScannerState.AfterOpeningStartTag; | ||
|  |                     return finishToken(offset, TokenType.StartTagOpen); | ||
|  |                 } | ||
|  |                 stream.advanceUntilChar(_LAN); | ||
|  |                 return finishToken(offset, TokenType.Content); | ||
|  |             case ScannerState.AfterOpeningEndTag: | ||
|  |                 const tagName = nextElementName(); | ||
|  |                 if (tagName.length > 0) { | ||
|  |                     state = ScannerState.WithinEndTag; | ||
|  |                     return finishToken(offset, TokenType.EndTag); | ||
|  |                 } | ||
|  |                 if (stream.skipWhitespace()) { // white space is not valid here
 | ||
|  |                     return finishToken(offset, TokenType.Whitespace, l10n.t('Tag name must directly follow the open bracket.')); | ||
|  |                 } | ||
|  |                 state = ScannerState.WithinEndTag; | ||
|  |                 stream.advanceUntilChar(_RAN); | ||
|  |                 if (offset < stream.pos()) { | ||
|  |                     return finishToken(offset, TokenType.Unknown, l10n.t('End tag name expected.')); | ||
|  |                 } | ||
|  |                 return internalScan(); | ||
|  |             case ScannerState.WithinEndTag: | ||
|  |                 if (stream.skipWhitespace()) { // white space is valid here
 | ||
|  |                     return finishToken(offset, TokenType.Whitespace); | ||
|  |                 } | ||
|  |                 if (stream.advanceIfChar(_RAN)) { // >
 | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.EndTagClose); | ||
|  |                 } | ||
|  |                 if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
 | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.EndTagClose, l10n.t('Closing bracket missing.')); | ||
|  |                 } | ||
|  |                 errorMessage = l10n.t('Closing bracket expected.'); | ||
|  |                 break; | ||
|  |             case ScannerState.AfterOpeningStartTag: | ||
|  |                 lastTag = nextElementName(); | ||
|  |                 lastTypeValue = void 0; | ||
|  |                 lastAttributeName = void 0; | ||
|  |                 if (lastTag.length > 0) { | ||
|  |                     hasSpaceAfterTag = false; | ||
|  |                     state = ScannerState.WithinTag; | ||
|  |                     return finishToken(offset, TokenType.StartTag); | ||
|  |                 } | ||
|  |                 if (stream.skipWhitespace()) { // white space is not valid here
 | ||
|  |                     return finishToken(offset, TokenType.Whitespace, l10n.t('Tag name must directly follow the open bracket.')); | ||
|  |                 } | ||
|  |                 state = ScannerState.WithinTag; | ||
|  |                 stream.advanceUntilChar(_RAN); | ||
|  |                 if (offset < stream.pos()) { | ||
|  |                     return finishToken(offset, TokenType.Unknown, l10n.t('Start tag name expected.')); | ||
|  |                 } | ||
|  |                 return internalScan(); | ||
|  |             case ScannerState.WithinTag: | ||
|  |                 if (stream.skipWhitespace()) { | ||
|  |                     hasSpaceAfterTag = true; // remember that we have seen a whitespace
 | ||
|  |                     return finishToken(offset, TokenType.Whitespace); | ||
|  |                 } | ||
|  |                 if (hasSpaceAfterTag) { | ||
|  |                     lastAttributeName = nextAttributeName(); | ||
|  |                     if (lastAttributeName.length > 0) { | ||
|  |                         state = ScannerState.AfterAttributeName; | ||
|  |                         hasSpaceAfterTag = false; | ||
|  |                         return finishToken(offset, TokenType.AttributeName); | ||
|  |                     } | ||
|  |                 } | ||
|  |                 if (stream.advanceIfChars([_FSL, _RAN])) { // />
 | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.StartTagSelfClose); | ||
|  |                 } | ||
|  |                 if (stream.advanceIfChar(_RAN)) { // >
 | ||
|  |                     if (lastTag === 'script') { | ||
|  |                         if (lastTypeValue && htmlScriptContents[lastTypeValue]) { | ||
|  |                             // stay in html
 | ||
|  |                             state = ScannerState.WithinContent; | ||
|  |                         } | ||
|  |                         else { | ||
|  |                             state = ScannerState.WithinScriptContent; | ||
|  |                         } | ||
|  |                     } | ||
|  |                     else if (lastTag === 'style') { | ||
|  |                         state = ScannerState.WithinStyleContent; | ||
|  |                     } | ||
|  |                     else { | ||
|  |                         state = ScannerState.WithinContent; | ||
|  |                     } | ||
|  |                     return finishToken(offset, TokenType.StartTagClose); | ||
|  |                 } | ||
|  |                 if (emitPseudoCloseTags && stream.peekChar() === _LAN) { // <
 | ||
|  |                     state = ScannerState.WithinContent; | ||
|  |                     return finishToken(offset, TokenType.StartTagClose, l10n.t('Closing bracket missing.')); | ||
|  |                 } | ||
|  |                 stream.advance(1); | ||
|  |                 return finishToken(offset, TokenType.Unknown, l10n.t('Unexpected character in tag.')); | ||
|  |             case ScannerState.AfterAttributeName: | ||
|  |                 if (stream.skipWhitespace()) { | ||
|  |                     hasSpaceAfterTag = true; | ||
|  |                     return finishToken(offset, TokenType.Whitespace); | ||
|  |                 } | ||
|  |                 if (stream.advanceIfChar(_EQS)) { | ||
|  |                     state = ScannerState.BeforeAttributeValue; | ||
|  |                     return finishToken(offset, TokenType.DelimiterAssign); | ||
|  |                 } | ||
|  |                 state = ScannerState.WithinTag; | ||
|  |                 return internalScan(); // no advance yet - jump to WithinTag
 | ||
|  |             case ScannerState.BeforeAttributeValue: | ||
|  |                 if (stream.skipWhitespace()) { | ||
|  |                     return finishToken(offset, TokenType.Whitespace); | ||
|  |                 } | ||
|  |                 let attributeValue = stream.advanceIfRegExp(/^[^\s"'`=<>]+/); | ||
|  |                 if (attributeValue.length > 0) { | ||
|  |                     if (stream.peekChar() === _RAN && stream.peekChar(-1) === _FSL) { // <foo bar=http://foo/>
 | ||
|  |                         stream.goBack(1); | ||
|  |                         attributeValue = attributeValue.substring(0, attributeValue.length - 1); | ||
|  |                     } | ||
|  |                     if (lastAttributeName === 'type') { | ||
|  |                         lastTypeValue = attributeValue; | ||
|  |                     } | ||
|  |                     if (attributeValue.length > 0) { | ||
|  |                         state = ScannerState.WithinTag; | ||
|  |                         hasSpaceAfterTag = false; | ||
|  |                         return finishToken(offset, TokenType.AttributeValue); | ||
|  |                     } | ||
|  |                 } | ||
|  |                 const ch = stream.peekChar(); | ||
|  |                 if (ch === _SQO || ch === _DQO) { | ||
|  |                     stream.advance(1); // consume quote
 | ||
|  |                     if (stream.advanceUntilChar(ch)) { | ||
|  |                         stream.advance(1); // consume quote
 | ||
|  |                     } | ||
|  |                     if (lastAttributeName === 'type') { | ||
|  |                         lastTypeValue = stream.getSource().substring(offset + 1, stream.pos() - 1); | ||
|  |                     } | ||
|  |                     state = ScannerState.WithinTag; | ||
|  |                     hasSpaceAfterTag = false; | ||
|  |                     return finishToken(offset, TokenType.AttributeValue); | ||
|  |                 } | ||
|  |                 state = ScannerState.WithinTag; | ||
|  |                 hasSpaceAfterTag = false; | ||
|  |                 return internalScan(); // no advance yet - jump to WithinTag
 | ||
|  |             case ScannerState.WithinScriptContent: | ||
|  |                 // see http://stackoverflow.com/questions/14574471/how-do-browsers-parse-a-script-tag-exactly
 | ||
|  |                 let sciptState = 1; | ||
|  |                 while (!stream.eos()) { | ||
|  |                     const match = stream.advanceIfRegExp(/<!--|-->|<\/?script\s*\/?>?/i); | ||
|  |                     if (match.length === 0) { | ||
|  |                         stream.goToEnd(); | ||
|  |                         return finishToken(offset, TokenType.Script); | ||
|  |                     } | ||
|  |                     else if (match === '<!--') { | ||
|  |                         if (sciptState === 1) { | ||
|  |                             sciptState = 2; | ||
|  |                         } | ||
|  |                     } | ||
|  |                     else if (match === '-->') { | ||
|  |                         sciptState = 1; | ||
|  |                     } | ||
|  |                     else if (match[1] !== '/') { // <script
 | ||
|  |                         if (sciptState === 2) { | ||
|  |                             sciptState = 3; | ||
|  |                         } | ||
|  |                     } | ||
|  |                     else { // </script
 | ||
|  |                         if (sciptState === 3) { | ||
|  |                             sciptState = 2; | ||
|  |                         } | ||
|  |                         else { | ||
|  |                             stream.goBack(match.length); // to the beginning of the closing tag
 | ||
|  |                             break; | ||
|  |                         } | ||
|  |                     } | ||
|  |                 } | ||
|  |                 state = ScannerState.WithinContent; | ||
|  |                 if (offset < stream.pos()) { | ||
|  |                     return finishToken(offset, TokenType.Script); | ||
|  |                 } | ||
|  |                 return internalScan(); // no advance yet - jump to content
 | ||
|  |             case ScannerState.WithinStyleContent: | ||
|  |                 stream.advanceUntilRegExp(/<\/style/i); | ||
|  |                 state = ScannerState.WithinContent; | ||
|  |                 if (offset < stream.pos()) { | ||
|  |                     return finishToken(offset, TokenType.Styles); | ||
|  |                 } | ||
|  |                 return internalScan(); // no advance yet - jump to content
 | ||
|  |         } | ||
|  |         stream.advance(1); | ||
|  |         state = ScannerState.WithinContent; | ||
|  |         return finishToken(offset, TokenType.Unknown, errorMessage); | ||
|  |     } | ||
|  |     return { | ||
|  |         scan, | ||
|  |         getTokenType: () => tokenType, | ||
|  |         getTokenOffset: () => tokenOffset, | ||
|  |         getTokenLength: () => stream.pos() - tokenOffset, | ||
|  |         getTokenEnd: () => stream.pos(), | ||
|  |         getTokenText: () => stream.getSource().substring(tokenOffset, stream.pos()), | ||
|  |         getScannerState: () => state, | ||
|  |         getTokenError: () => tokenError | ||
|  |     }; | ||
|  | } |