tokenizer_mixin.js
4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
'use strict';
var UNICODE = require('../common/unicode');
//Aliases
var $ = UNICODE.CODE_POINTS;
exports.assign = function (tokenizer) {
//NOTE: obtain Tokenizer proto this way to avoid module circular references
var tokenizerProto = Object.getPrototypeOf(tokenizer),
tokenStartOffset = -1,
tokenCol = -1,
tokenLine = 1,
isEol = false,
lineStartPos = 0,
col = -1,
line = 1;
function attachLocationInfo(token) {
token.location = {
line: tokenLine,
col: tokenCol,
startOffset: tokenStartOffset,
endOffset: -1
};
}
//NOTE: patch consumption method to track line/col information
tokenizer._consume = function () {
var cp = tokenizerProto._consume.call(this);
//NOTE: LF should be in the last column of the line
if (isEol) {
isEol = false;
line++;
lineStartPos = this.preprocessor.sourcePos;
}
if (cp === $.LINE_FEED)
isEol = true;
col = this.preprocessor.sourcePos - lineStartPos + 1;
return cp;
};
tokenizer._unconsume = function () {
tokenizerProto._unconsume.call(this);
isEol = false;
col = this.preprocessor.sourcePos - lineStartPos + 1;
};
//NOTE: patch token creation methods and attach location objects
tokenizer._createStartTagToken = function () {
tokenizerProto._createStartTagToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createEndTagToken = function () {
tokenizerProto._createEndTagToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createCommentToken = function () {
tokenizerProto._createCommentToken.call(this);
attachLocationInfo(this.currentToken);
};
tokenizer._createDoctypeToken = function (initialName) {
tokenizerProto._createDoctypeToken.call(this, initialName);
attachLocationInfo(this.currentToken);
};
tokenizer._createCharacterToken = function (type, ch) {
tokenizerProto._createCharacterToken.call(this, type, ch);
attachLocationInfo(this.currentCharacterToken);
};
tokenizer._createAttr = function (attrNameFirstCh) {
tokenizerProto._createAttr.call(this, attrNameFirstCh);
this.currentAttrLocation = {
line: line,
col: col,
startOffset: this.preprocessor.sourcePos,
endOffset: -1
};
};
tokenizer._leaveAttrName = function (toState) {
tokenizerProto._leaveAttrName.call(this, toState);
this._attachCurrentAttrLocationInfo();
};
tokenizer._leaveAttrValue = function (toState) {
tokenizerProto._leaveAttrValue.call(this, toState);
this._attachCurrentAttrLocationInfo();
};
tokenizer._attachCurrentAttrLocationInfo = function () {
this.currentAttrLocation.endOffset = this.preprocessor.sourcePos;
if (!this.currentToken.location.attrs)
this.currentToken.location.attrs = Object.create(null);
this.currentToken.location.attrs[this.currentAttr.name] = this.currentAttrLocation;
};
//NOTE: patch token emission methods to determine end location
tokenizer._emitCurrentToken = function () {
//NOTE: if we have pending character token make it's end location equal to the
//current token's start location.
if (this.currentCharacterToken)
this.currentCharacterToken.location.endOffset = this.currentToken.location.startOffset;
this.currentToken.location.endOffset = this.preprocessor.sourcePos + 1;
tokenizerProto._emitCurrentToken.call(this);
};
tokenizer._emitCurrentCharacterToken = function () {
//NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(),
//then set it's location at the current preprocessor position.
//We don't need to increment preprocessor position, since character token
//emission is always forced by the start of the next character token here.
//So, we already have advanced position.
if (this.currentCharacterToken && this.currentCharacterToken.location.endOffset === -1)
this.currentCharacterToken.location.endOffset = this.preprocessor.sourcePos;
tokenizerProto._emitCurrentCharacterToken.call(this);
};
//NOTE: patch initial states for each mode to obtain token start position
Object.keys(tokenizerProto.MODE)
.map(function (modeName) {
return tokenizerProto.MODE[modeName];
})
.forEach(function (state) {
tokenizer[state] = function (cp) {
tokenStartOffset = this.preprocessor.sourcePos;
tokenLine = line;
tokenCol = col;
tokenizerProto[state].call(this, cp);
};
});
};