You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
5.6 KiB
182 lines
5.6 KiB
/** All the states the tokenizer can be in. */ |
|
declare const enum State { |
|
Text = 1, |
|
BeforeTagName = 2, |
|
InTagName = 3, |
|
InSelfClosingTag = 4, |
|
BeforeClosingTagName = 5, |
|
InClosingTagName = 6, |
|
AfterClosingTagName = 7, |
|
BeforeAttributeName = 8, |
|
InAttributeName = 9, |
|
AfterAttributeName = 10, |
|
BeforeAttributeValue = 11, |
|
InAttributeValueDq = 12, |
|
InAttributeValueSq = 13, |
|
InAttributeValueNq = 14, |
|
BeforeDeclaration = 15, |
|
InDeclaration = 16, |
|
InProcessingInstruction = 17, |
|
BeforeComment = 18, |
|
InComment = 19, |
|
InSpecialComment = 20, |
|
AfterComment1 = 21, |
|
AfterComment2 = 22, |
|
BeforeCdata1 = 23, |
|
BeforeCdata2 = 24, |
|
BeforeCdata3 = 25, |
|
BeforeCdata4 = 26, |
|
BeforeCdata5 = 27, |
|
BeforeCdata6 = 28, |
|
InCdata = 29, |
|
AfterCdata1 = 30, |
|
AfterCdata2 = 31, |
|
BeforeSpecialS = 32, |
|
BeforeSpecialSEnd = 33, |
|
BeforeScript1 = 34, |
|
BeforeScript2 = 35, |
|
BeforeScript3 = 36, |
|
BeforeScript4 = 37, |
|
BeforeScript5 = 38, |
|
AfterScript1 = 39, |
|
AfterScript2 = 40, |
|
AfterScript3 = 41, |
|
AfterScript4 = 42, |
|
AfterScript5 = 43, |
|
BeforeStyle1 = 44, |
|
BeforeStyle2 = 45, |
|
BeforeStyle3 = 46, |
|
BeforeStyle4 = 47, |
|
AfterStyle1 = 48, |
|
AfterStyle2 = 49, |
|
AfterStyle3 = 50, |
|
AfterStyle4 = 51, |
|
BeforeSpecialT = 52, |
|
BeforeSpecialTEnd = 53, |
|
BeforeTitle1 = 54, |
|
BeforeTitle2 = 55, |
|
BeforeTitle3 = 56, |
|
BeforeTitle4 = 57, |
|
AfterTitle1 = 58, |
|
AfterTitle2 = 59, |
|
AfterTitle3 = 60, |
|
AfterTitle4 = 61, |
|
BeforeEntity = 62, |
|
BeforeNumericEntity = 63, |
|
InNamedEntity = 64, |
|
InNumericEntity = 65, |
|
InHexEntity = 66 |
|
} |
|
export interface Callbacks { |
|
onattribdata(value: string): void; |
|
onattribend(quote: string | undefined | null): void; |
|
onattribname(name: string): void; |
|
oncdata(data: string): void; |
|
onclosetag(name: string): void; |
|
oncomment(data: string): void; |
|
ondeclaration(content: string): void; |
|
onend(): void; |
|
onerror(error: Error, state?: State): void; |
|
onopentagend(): void; |
|
onopentagname(name: string): void; |
|
onprocessinginstruction(instruction: string): void; |
|
onselfclosingtag(): void; |
|
ontext(value: string): void; |
|
} |
|
export default class Tokenizer { |
|
/** The current state the tokenizer is in. */ |
|
_state: State; |
|
/** The read buffer. */ |
|
private buffer; |
|
/** The beginning of the section that is currently being read. */ |
|
sectionStart: number; |
|
/** The index within the buffer that we are currently looking at. */ |
|
_index: number; |
|
/** |
|
* Data that has already been processed will be removed from the buffer occasionally. |
|
* `_bufferOffset` keeps track of how many characters have been removed, to make sure position information is accurate. |
|
*/ |
|
private bufferOffset; |
|
/** Some behavior, eg. when decoding entities, is done while we are in another state. This keeps track of the other state type. */ |
|
private baseState; |
|
/** For special parsing behavior inside of script and style tags. */ |
|
private special; |
|
/** Indicates whether the tokenizer has been paused. */ |
|
private running; |
|
/** Indicates whether the tokenizer has finished running / `.end` has been called. */ |
|
private ended; |
|
private readonly cbs; |
|
private readonly xmlMode; |
|
private readonly decodeEntities; |
|
constructor(options: { |
|
xmlMode?: boolean; |
|
decodeEntities?: boolean; |
|
} | null, cbs: Callbacks); |
|
reset(): void; |
|
write(chunk: string): void; |
|
end(chunk?: string): void; |
|
pause(): void; |
|
resume(): void; |
|
/** |
|
* The current index within all of the written data. |
|
*/ |
|
getAbsoluteIndex(): number; |
|
private stateText; |
|
/** |
|
* HTML only allows ASCII alpha characters (a-z and A-Z) at the beginning of a tag name. |
|
* |
|
* XML allows a lot more characters here (@see https://www.w3.org/TR/REC-xml/#NT-NameStartChar). |
|
* We allow anything that wouldn't end the tag. |
|
*/ |
|
private isTagStartChar; |
|
private stateBeforeTagName; |
|
private stateInTagName; |
|
private stateBeforeClosingTagName; |
|
private stateInClosingTagName; |
|
private stateAfterClosingTagName; |
|
private stateBeforeAttributeName; |
|
private stateInSelfClosingTag; |
|
private stateInAttributeName; |
|
private stateAfterAttributeName; |
|
private stateBeforeAttributeValue; |
|
private handleInAttributeValue; |
|
private stateInAttributeValueDoubleQuotes; |
|
private stateInAttributeValueSingleQuotes; |
|
private stateInAttributeValueNoQuotes; |
|
private stateBeforeDeclaration; |
|
private stateInDeclaration; |
|
private stateInProcessingInstruction; |
|
private stateBeforeComment; |
|
private stateInComment; |
|
private stateInSpecialComment; |
|
private stateAfterComment1; |
|
private stateAfterComment2; |
|
private stateBeforeCdata6; |
|
private stateInCdata; |
|
private stateAfterCdata1; |
|
private stateAfterCdata2; |
|
private stateBeforeSpecialS; |
|
private stateBeforeSpecialSEnd; |
|
private stateBeforeSpecialLast; |
|
private stateAfterSpecialLast; |
|
private parseFixedEntity; |
|
private parseLegacyEntity; |
|
private stateInNamedEntity; |
|
private decodeNumericEntity; |
|
private stateInNumericEntity; |
|
private stateInHexEntity; |
|
private cleanup; |
|
/** |
|
* Iterates through the buffer, calling the function corresponding to the current state. |
|
* |
|
* States that are more likely to be hit are higher up, as a performance improvement. |
|
*/ |
|
private parse; |
|
private finish; |
|
private handleTrailingData; |
|
private getSection; |
|
private emitToken; |
|
private emitPartial; |
|
} |
|
export {}; |
|
//# sourceMappingURL=Tokenizer.d.ts.map
|