Skip to content

Commit 8452ccd

Browse files
committed
Convert date-time parser from regexp, expand tests
None of the regexps (at least, when they were removed) are vulnerable to ReDoS. However, took this opportunity to check that the RFC is being closer and more clearly documented where in the code. Another way to put this: "regexps are magic and hinder code analysis" Introduced some equivalence tests to ensure that certain "weird" dates are indeed parsing the same as their "canonical" RFC6265 counterpart.
1 parent 8614dbf commit 8452ccd

File tree

2 files changed

+194
-58
lines changed

2 files changed

+194
-58
lines changed

lib/cookie.js

Lines changed: 119 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,7 @@ var PATH_VALUE = /[\x20-\x3A\x3C-\x7E]+/;
6262
// date-time parsing constants (RFC6265 S5.1.1)
6363

6464
var DATE_DELIM = /[\x09\x20-\x2F\x3B-\x40\x5B-\x60\x7B-\x7E]/;
65-
var DAY_OF_MONTH = /^(\d{1,2})(?:[^\d]|$)/;
6665

67-
// S5.1.1 for "hms-time" -- is one or two digits each separated by :
68-
// Cannot have non-digits beside the numbers like in other parts of the
69-
// construction.
70-
var TIME = /^(\d{1,2}):(\d{1,2}):(\d{1,2})(?:[^\d]|$)/; // only anchor at start
71-
72-
var MONTH = /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/i;
7366
var MONTH_TO_NUM = {
7467
jan:0, feb:1, mar:2, apr:3, may:4, jun:5,
7568
jul:6, aug:7, sep:8, oct:9, nov:10, dec:11
@@ -81,13 +74,80 @@ var NUM_TO_DAY = [
8174
'Sun','Mon','Tue','Wed','Thu','Fri','Sat'
8275
];
8376

84-
var YEAR = /^(\d{2}|\d{4})(?:[^\d]|$)/; // 2 or 4 digits, anchored at start
85-
8677
var MAX_TIME = 2147483647000; // 31-bit max
8778
var MIN_TIME = 0; // 31-bit min
8879

80+
/*
81+
* Parses a Natural number (i.e., non-negative integer) with either the
82+
* <min>*<max>DIGIT ( non-digit *OCTET )
83+
* or
84+
* <min>*<max>DIGIT
85+
* grammar (RFC6265 S5.1.1).
86+
*
87+
* The "trailingOK" boolean controls if the grammar accepts a
88+
* "( non-digit *OCTET )" trailer.
89+
*/
90+
function parseDigits(token, minDigits, maxDigits, trailingOK) {
91+
var count = 0;
92+
while (count < token.length) {
93+
var c = token.charCodeAt(count);
94+
// "non-digit = %x00-2F / %x3A-FF"
95+
if (c <= 0x2F || c >= 0x3A) {
96+
break;
97+
}
98+
count++;
99+
}
100+
101+
// constrain to a minimum and maximum number of digits.
102+
if (count < minDigits || count > maxDigits) {
103+
return null;
104+
}
89105

90-
// RFC6265 S5.1.1 date parser:
106+
if (!trailingOK && count != token.length) {
107+
return null;
108+
}
109+
110+
return parseInt(token.substr(0,count), 10);
111+
}
112+
113+
function parseTime(token) {
114+
var parts = token.split(':');
115+
var result = [0,0,0];
116+
117+
/* RF6256 S5.1.1:
118+
* time = hms-time ( non-digit *OCTET )
119+
* hms-time = time-field ":" time-field ":" time-field
120+
* time-field = 1*2DIGIT
121+
*/
122+
123+
if (parts.length !== 3) {
124+
return null;
125+
}
126+
127+
for (var i = 0; i < 3; i++) {
128+
// "time-field" must be strictly "1*2DIGIT", HOWEVER, "hms-time" can be
129+
// followed by "( non-digit *OCTET )" so therefore the last time-field can
130+
// have a trailer
131+
var trailingOK = (i == 2);
132+
var num = parseDigits(parts[i], 1, 2, trailingOK);
133+
if (num === null) {
134+
return null;
135+
}
136+
result[i] = num;
137+
}
138+
139+
return result;
140+
}
141+
142+
function parseMonth(token) {
143+
token = String(token).substr(0,3).toLowerCase();
144+
var num = MONTH_TO_NUM[token];
145+
return num >= 0 ? num : null;
146+
}
147+
148+
/*
149+
* RFC6265 S5.1.1 date parser (see RFC for full grammar)
150+
*/
91151
function parseDate(str) {
92152
if (!str) {
93153
return;
@@ -103,9 +163,9 @@ function parseDate(str) {
103163
}
104164

105165
var hour = null;
106-
var minutes = null;
107-
var seconds = null;
108-
var day = null;
166+
var minute = null;
167+
var second = null;
168+
var dayOfMonth = null;
109169
var month = null;
110170
var year = null;
111171

@@ -123,22 +183,12 @@ function parseDate(str) {
123183
* the date-token, respectively. Skip the remaining sub-steps and continue
124184
* to the next date-token.
125185
*/
126-
if (seconds === null) {
127-
result = TIME.exec(token);
186+
if (second === null) {
187+
result = parseTime(token);
128188
if (result) {
129-
hour = parseInt(result[1], 10);
130-
minutes = parseInt(result[2], 10);
131-
seconds = parseInt(result[3], 10);
132-
/* RFC6265 S5.1.1.5:
133-
* [fail if]
134-
* * the hour-value is greater than 23,
135-
* * the minute-value is greater than 59, or
136-
* * the second-value is greater than 59.
137-
*/
138-
if(hour > 23 || minutes > 59 || seconds > 59) {
139-
return;
140-
}
141-
189+
hour = result[0];
190+
minute = result[1];
191+
second = result[2];
142192
continue;
143193
}
144194
}
@@ -148,16 +198,11 @@ function parseDate(str) {
148198
* the day-of-month-value to the number denoted by the date-token. Skip
149199
* the remaining sub-steps and continue to the next date-token.
150200
*/
151-
if (day === null) {
152-
result = DAY_OF_MONTH.exec(token);
153-
if (result) {
154-
day = parseInt(result[1], 10);
155-
/* RFC6265 S5.1.1.5:
156-
* [fail if] the day-of-month-value is less than 1 or greater than 31
157-
*/
158-
if(day < 1 || day > 31) {
159-
return;
160-
}
201+
if (dayOfMonth === null) {
202+
// "day-of-month = 1*2DIGIT ( non-digit *OCTET )"
203+
result = parseDigits(token, 1, 2, true);
204+
if (result !== null) {
205+
dayOfMonth = result;
161206
continue;
162207
}
163208
}
@@ -168,47 +213,63 @@ function parseDate(str) {
168213
* continue to the next date-token.
169214
*/
170215
if (month === null) {
171-
result = MONTH.exec(token);
172-
if (result) {
173-
month = MONTH_TO_NUM[result[1].toLowerCase()];
216+
result = parseMonth(token);
217+
if (result !== null) {
218+
month = result;
174219
continue;
175220
}
176221
}
177222

178-
/* 2.4. If the found-year flag is not set and the date-token matches the year
179-
* production, set the found-year flag and set the year-value to the number
180-
* denoted by the date-token. Skip the remaining sub-steps and continue to
181-
* the next date-token.
223+
/* 2.4. If the found-year flag is not set and the date-token matches the
224+
* year production, set the found-year flag and set the year-value to the
225+
* number denoted by the date-token. Skip the remaining sub-steps and
226+
* continue to the next date-token.
182227
*/
183228
if (year === null) {
184-
result = YEAR.exec(token);
185-
if (result) {
186-
year = parseInt(result[0], 10);
229+
// "year = 2*4DIGIT ( non-digit *OCTET )"
230+
result = parseDigits(token, 2, 4, true);
231+
if (result !== null) {
232+
year = result;
187233
/* From S5.1.1:
188234
* 3. If the year-value is greater than or equal to 70 and less
189235
* than or equal to 99, increment the year-value by 1900.
190236
* 4. If the year-value is greater than or equal to 0 and less
191237
* than or equal to 69, increment the year-value by 2000.
192238
*/
193-
if (70 <= year && year <= 99) {
239+
if (year >= 70 && year <= 99) {
194240
year += 1900;
195-
} else if (0 <= year && year <= 69) {
241+
} else if (year >= 0 && year <= 69) {
196242
year += 2000;
197243
}
198-
199-
if (year < 1601) {
200-
return; // 5. ... the year-value is less than 1601
201-
}
202244
}
203245
}
204246
}
205247

206-
if (seconds === null || day === null || month === null || year === null) {
207-
return; // 5. ... at least one of the found-day-of-month, found-month, found-
208-
// year, or found-time flags is not set,
248+
/* RFC 6265 S5.1.1
249+
* "5. Abort these steps and fail to parse the cookie-date if:
250+
* * at least one of the found-day-of-month, found-month, found-
251+
* year, or found-time flags is not set,
252+
* * the day-of-month-value is less than 1 or greater than 31,
253+
* * the year-value is less than 1601,
254+
* * the hour-value is greater than 23,
255+
* * the minute-value is greater than 59, or
256+
* * the second-value is greater than 59.
257+
* (Note that leap seconds cannot be represented in this syntax.)"
258+
*
259+
* So, in order as above:
260+
*/
261+
if (
262+
dayOfMonth === null || month === null || year === null || second === null ||
263+
dayOfMonth < 1 || dayOfMonth > 31 ||
264+
year < 1601 ||
265+
hour > 23 ||
266+
minute > 59 ||
267+
second > 59
268+
) {
269+
return;
209270
}
210271

211-
return new Date(Date.UTC(year, month, day, hour, minutes, seconds));
272+
return new Date(Date.UTC(year, month, dayOfMonth, hour, minute, second));
212273
}
213274

214275
function formatDate(date) {

test/date_test.js

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,30 @@ function dateVows(table) {
5353
return {"date parsing": theVows};
5454
}
5555

56+
function equivalenceVows(table) {
57+
var theVows = {};
58+
Object.keys(table).forEach(function (thisDate) {
59+
var sameAs = table[thisDate];
60+
var label = "'"+thisDate+"' parses the same as '"+sameAs+"'";
61+
theVows[label] = function () {
62+
var expected = tough.parseDate(sameAs);
63+
var actual = tough.parseDate(thisDate);
64+
if (!expected && !actual) {
65+
assert.ok(false, "both dates failed to parse!");
66+
}
67+
assert.equal(actual.toString(), expected.toString());
68+
};
69+
});
70+
return {"equivalence parsing": theVows};
71+
}
72+
5673
var TOO_MANY_XS = String("x").repeat(65535);
5774

5875
vows
5976
.describe('Date')
6077
.addBatch(dateVows({
6178
"Wed, 09 Jun 2021 10:18:14 GMT": true,
79+
"Wed, 09 JUN 2021 10:18:14 GMT": true,
6280
"Wed, 09 Jun 2021 22:18:14 GMT": true,
6381
"Tue, 18 Oct 2011 07:42:42.123 GMT": true,
6482
"18 Oct 2011 07:42:42 GMT": true,
@@ -90,6 +108,19 @@ vows
90108
"Thu, 01 Jan 1970 00:000:01 GMT": false,
91109
"Thu, 01 Jan 1970 00:00:010 GMT": false,
92110

111+
// hex in time
112+
"Wed, 09 Jun 2021 1a:33:44 GMT": false,
113+
"Wed, 09 Jun 2021 a1:33:44 GMT": false,
114+
"Wed, 09 Jun 2021 11:f3:44 GMT": false,
115+
"Wed, 09 Jun 2021 11:3f:44 GMT": false,
116+
"Wed, 09 Jun 2021 11:33:e4 GMT": false,
117+
"Wed, 09 Jun 2021 11:33:4e GMT": true, // garbage after seconds is OK
118+
119+
// negatives in time
120+
"Wed, 09 Jun 2021 -1:33:44 GMT": true, // parses as 1:33; - is a delimiter
121+
"Wed, 09 Jun 2021 11:-3:44 GMT": false,
122+
"Wed, 09 Jun 2021 11:33:-4 GMT": false,
123+
93124
"": false
94125
}))
95126
.addBatch({
@@ -121,4 +152,48 @@ vows
121152
}
122153
}
123154
})
155+
.addBatch(equivalenceVows({
156+
// milliseconds ignored
157+
"Tue, 18 Oct 2011 07:42:42.123 GMT": "Tue, 18 Oct 2011 07:42:42 GMT",
158+
159+
// shorter HH:MM:SS works how you'd expect:
160+
"8 Oct 2011 7:32:42 GMT": "8 Oct 2011 07:32:42 GMT",
161+
"8 Oct 2011 7:2:42 GMT": "8 Oct 2011 07:02:42 GMT",
162+
"8 Oct 2011 7:2:2 GMT": "8 Oct 2011 07:02:02 GMT",
163+
164+
// MDY versus DMY:
165+
"Oct 18 2011 07:42:42 GMT": "18 Oct 2011 07:42:42 GMT",
166+
167+
// some other messy auto format
168+
"Tue Oct 18 2011 07:05:03 GMT+0000 (GMT)": "Tue, 18 Oct 2011 07:05:03 GMT",
169+
170+
// short year
171+
'10 Feb 81 13:00:00 GMT': '10 Feb 1981 13:00:00 GMT',
172+
'10 Feb 17 13:00:00 GMT': '10 Feb 2017 13:00:00 GMT',
173+
174+
// dashes
175+
'Thu, 17-Apr-2014 02:12:29 GMT': 'Thu, 17 Apr 2014 02:12:29 GMT',
176+
// dashes and "UTC" (timezone is always ignored)
177+
'Thu, 17-Apr-2014 02:12:29 UTC': 'Thu, 17 Apr 2014 02:12:29 GMT',
178+
179+
// no weekday
180+
"09 Jun 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT",
181+
182+
// garbage after seconds is OK
183+
"Wed, 09 Jun 2021 11:33:4e GMT": "Wed, 09 Jun 2021 11:33:04 GMT",
184+
185+
// - is delimiter in this position
186+
"Wed, 09 Jun 2021 -1:33:44 GMT": "Wed, 09 Jun 2021 01:33:44 GMT",
187+
188+
// prefix match on month
189+
"Wed, 09 Junxxx 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT",
190+
"09 November 2021 10:18:14 GMT": "09 Nov 2021 10:18:14 GMT",
191+
192+
// case of Month
193+
"Wed, 09 JUN 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT",
194+
"Wed, 09 jUN 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT",
195+
196+
// test the framework :wink:
197+
"Wed, 09 Jun 2021 10:18:14 GMT": "Wed, 09 Jun 2021 10:18:14 GMT"
198+
}))
124199
.export(module);

0 commit comments

Comments
 (0)