阅读:5208回复:0
Madfox 补丁解析--jscript.encode
当一个网页被读入的时候,解析器会根据不同的网页调用不同的内容分析模块(ContentSink),对于HTML会调用HTMLSink。
HTMLSink在发现script这个tag的时候,会调用nsParserUtils里的一个函数,判断它是否是被支持的script,先修改这个判断,让jscript.encode这个language属性也成为被支持的脚本。 如果脚本是jscript.encode,在HTMLSink中将encode的部分decode,这样在后面就可以直接当javascript来处理了。中间的代码部分都是做decode用的。 这部分代码当且尽当网页中存在jscript.encode时才起作用。目前只有btchina使用这个功能。 Index: content/html/document/src/nsHTMLContentSink.cpp =================================================================== --- content/html/document/src/nsHTMLContentSink.cpp (revision 4) +++ content/html/document/src/nsHTMLContentSink.cpp (revision 5) @@ -200,6 +200,9 @@ NS_NewHTMLUnknownElement }; +int ScriptDecoder (const nsAString &instr, nsAString &outstr, + unsigned int cp); + class SinkContext; class HTMLContentSink : public nsContentSink, @@ -4294,7 +4297,18 @@ rv = NS_NewTextNode(getter_AddRefs(text)); NS_ENSURE_SUCCESS(rv, rv); - text->SetText(script, PR_TRUE); + // check if it is jscript.encode + nsAutoString language; + nsCOMPtr<nsIDOMHTMLScriptElement> domele(do_QueryInterface(element)); + domele->GetAttribute(NS_LITERAL_STRING("language"), language); + if (!language.IsEmpty()&&language.EqualsIgnoreCase("jscript.encode")) { + nsAutoString decscript; + ScriptDecoder(script, decscript, 0); + text->SetText(decscript, PR_TRUE); + } + else { + text->SetText(script, PR_TRUE); + } element->AppendChildTo(text, PR_FALSE, PR_FALSE); text->SetDocument(mDocument, PR_FALSE, PR_TRUE); @@ -4595,3 +4609,411 @@ return rv; } +/* + * Following algorithm for decoding jscript.encode came from + * scrdec.c at http://www.virtualconspiracy.com and is is ©2000 by + * mrbrownstone @ virtualconspiracy.com. + * Integration was done by robin.lu@sun.com + * Here's the original copyright message and disclaimer: + **********************************************************************/ +/* scrdec.c - Decoder for Microsoft Script Encoder */ +/* Version 1.5 */ +/* */ +/* COPYRIGHT: */ +/* (c)2000-2004 MrBrownstone, mrbrownstone@ virtualconspiracy.com */ +/* v1.5 Bypassed a cleaver trick defeating this tool */ +/* v1.4 Some changes by Joe Steele to correct minor stuff */ +/* */ +/* DISCLAIMER: */ +/* Use of this program is at your own risk. The author cannot be held */ +/* responsible if any laws are broken by use of this program. */ +/* */ +/* If you use or distribute this code, this message should be held */ +/* intact. Also, any program based upon this code should display the */ +/* copyright message and the disclaimer. */ +/**********************************************************************/ +#define LEN_OUTBUF 1024 + +#define STATE_INIT_COPY 100 +#define STATE_COPY_INPUT 101 +#define STATE_SKIP_ML 102 +#define STATE_CHECKSUM 103 +#define STATE_READLEN 104 +#define STATE_DECODE 105 +#define STATE_UNESCAPE 106 +#define STATE_FLUSHING 107 +#define STATE_DBCS 108 +#define STATE_INIT_READLEN 109 +#define STATE_URLENCODE_1 110 +#define STATE_URLENCODE_2 111 +#define STATE_WAIT_FOR_CLOSE 112 + + + +unsigned char unescape (unsigned char c) +{ + static unsigned char escapes[] = "#&!*$"; + static unsigned char escaped[] = "\r\n<>@"; + int i=0; + + if (c & 0x80) + return c; + while (escapes[i]) + { + if (escapes[i] == c) + return escaped[i]; + i++; + } + return '?'; +} + +unsigned long int decodeBase64 (PRUnichar *p) +{ + unsigned long int val = 0; + + static int digits[0x7a]= { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x3e,0x00,0x00,0x00,0x3f,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b, + 0x3c,0x3d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04, + 0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12, + 0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x00,0x00,0x00,0x00,0x00,0x00,0x1a, + 0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, + 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32 + }; + + + val += (digits[p[0]] << 2); + val += (digits[p[1]] >> 4); + val += (digits[p[1]] & 0xf) << 12; + val += ((digits[p[2]] >> 2) << 8); + val += ((digits[p[2]] & 0x3) << 22); + val += (digits[p[3]] << 16); + val += ((digits[p[4]] << 2) << 24); + val += ((digits[p[5]] >> 4) << 24); + + /* 543210 543210 543210 543210 543210 543210 + + 765432 + 10 + ba98 + fedc + 76 + 543210 + fedcba 98---- + |- LSB -||- -||- -| |- MSB -| + */ + return val; +} + +/* + Char. number range | UTF-8 octet sequence + (hexadecimal) | (binary) + --------------------+--------------------------------------------- + 0000 0000-0000 007F | 0xxxxxxx + 0000 0080-0000 07FF | 110xxxxx 10xxxxxx + 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx + 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx +*/ + +int isLeadByte (unsigned int cp, unsigned char ucByte) +{ + /* Code page 932 - Japanese Shift-JIS - 0x81-0x9f + 0xe0-0xfc + 936 - Simplified Chinese GBK - 0xa1-0xfe + 949 - Korean Wansung - 0x81-0xfe + 950 - Traditional Chinese Big5 - 0x81-0xfe + 1361 - Korean Johab - 0x84-0xd3 + 0xd9-0xde + 0xe0-0xf9 */ + switch (cp) + { + case 932: + if ((ucByte > 0x80) && (ucByte < 0xa0)) return 1; + if ((ucByte > 0xdf) && (ucByte < 0xfd)) return 1; + else return 0; + case 936: + if ((ucByte > 0xa0) && (ucByte < 0xff)) return 1; + else return 0; + case 949: + case 950: + if ((ucByte > 0x80) && (ucByte < 0xff)) return 1; + else return 0; + case 1361: + if ((ucByte > 0x83) && (ucByte < 0xd4)) return 1; + if ((ucByte > 0xd8) && (ucByte < 0xdf)) return 1; + if ((ucByte > 0xdf) && (ucByte < 0xfa)) return 1; + else return 0; + default: + return 0; + } + +} + +int ScriptDecoder (const nsAString &instr, nsAString &outstr, unsigned int cp) +{ + const unsigned char pick_encoding[64] = { + 1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0, + 1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2, + 1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2, + 1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2 + }; + + unsigned char transformed[3][127] = { + { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7b,0x7b,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x32,0x30,0x21,0x29,0x5b, + 0x38,0x33,0x3d,0x58,0x3a,0x35,0x65,0x39,0x5c,0x56,0x73,0x66, + 0x4e,0x45,0x6b,0x62,0x59,0x78,0x5e,0x7d,0x4a,0x6d,0x71,0x71, + 0x60,0x60,0x53,0x53,0x42,0x27,0x48,0x72,0x75,0x31,0x37,0x4d, + 0x52,0x22,0x54,0x6a,0x47,0x64,0x2d,0x20,0x7f,0x2e,0x4c,0x5d, + 0x7e,0x6c,0x6f,0x79,0x74,0x43,0x26,0x76,0x25,0x24,0x2b,0x28, + 0x23,0x41,0x34,0x09,0x2a,0x44,0x3f,0x77,0x3b,0x55,0x69,0x61, + 0x63,0x50,0x67,0x51,0x49,0x4f,0x46,0x68,0x7c,0x36,0x70,0x6e, + 0x7a,0x2f,0x5f,0x4b,0x5a,0x2c,0x57}, + { 0x57,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x57,0x57,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e,0x47,0x7a,0x56,0x42, + 0x6a,0x2f,0x26,0x49,0x41,0x34,0x32,0x5b,0x76,0x72,0x43,0x38, + 0x39,0x70,0x45,0x68,0x71,0x4f,0x09,0x62,0x44,0x23,0x75,0x75, + 0x7e,0x7e,0x5e,0x5e,0x77,0x4a,0x61,0x5d,0x22,0x4b,0x6f,0x4e, + 0x3b,0x4c,0x50,0x67,0x2a,0x7d,0x74,0x54,0x2b,0x2d,0x2c,0x30, + 0x6e,0x6b,0x66,0x35,0x25,0x21,0x64,0x4d,0x52,0x63,0x3f,0x7b, + 0x78,0x29,0x28,0x73,0x59,0x33,0x7f,0x6d,0x55,0x53,0x7c,0x3a, + 0x5f,0x65,0x46,0x58,0x31,0x69,0x6c,0x5a,0x48,0x27,0x5c,0x3d, + 0x24,0x79,0x37,0x60,0x51,0x20,0x36}, + { 0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x6e,0x6e,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2d,0x75,0x52,0x60,0x71, + 0x5e,0x49,0x5c,0x62,0x7d,0x29,0x36,0x20,0x7c,0x7a,0x7f,0x6b, + 0x63,0x33,0x2b,0x68,0x51,0x66,0x76,0x31,0x64,0x54,0x43,0x43, + 0x3a,0x3a,0x7e,0x7e,0x45,0x2c,0x2a,0x74,0x27,0x37,0x44,0x79, + 0x59,0x2f,0x6f,0x26,0x72,0x6a,0x39,0x7b,0x3f,0x38,0x77,0x67, + 0x53,0x47,0x34,0x78,0x5d,0x30,0x23,0x5a,0x5b,0x6c,0x48,0x55, + 0x70,0x69,0x2e,0x4c,0x21,0x24,0x4e,0x50,0x09,0x56,0x73,0x35, + 0x61,0x4b,0x58,0x3b,0x57,0x22,0x6d,0x4d,0x25,0x28,0x46,0x4a, + 0x32,0x41,0x3d,0x5f,0x4f,0x42,0x65} + }; + + + int urlencoded = 0; + int verbose = 0; + nsString::const_iterator inbuf, end; + instr.BeginReading(inbuf); + instr.EndReading(end); + PRUnichar outbuf[LEN_OUTBUF+1]; + PRUnichar c, c1, c2, lenbuf[7], csbuf[7]; + unsigned char marker[] = "#@~^"; + int ustate, nextstate, state = 0; + int i, j, k, m, ml = 0; + int utf8 = 0; + unsigned long int csum = 0, len = 0; + + state = STATE_INIT_COPY; + i = 0; + j = 0; + + while (state) + { + if (inbuf == end) + { + if (len) + { + printf ("Error: Premature end of file.\n"); + if (utf8>0) + printf ("Tip: The file seems to contain special characters, try the -cp option.\n"); + } + break; + } + + if (j == LEN_OUTBUF) + { + outstr.Append(nsDependentString(outbuf)); + j = 0; + } + + if ((urlencoded==1) && (*inbuf=='%')) + { + ustate = state; + state = STATE_URLENCODE_1; + inbuf++; + continue; + } + + if (urlencoded==2) + urlencoded=1; + + switch (state) + { + case STATE_INIT_COPY: + ml = strlen ((const char*)marker); + m = 0; + state = STATE_COPY_INPUT; + break; + + case STATE_WAIT_FOR_CLOSE: + if (*inbuf == '>') + state = STATE_INIT_COPY; + outbuf[j++] = *(inbuf++); + break; + + case STATE_COPY_INPUT: + if (*inbuf == marker[m]) + { + inbuf++; + m++; + } + else + { + if (m) + { + k = 0; + state = STATE_FLUSHING; + } + else + outbuf[j++] = *(inbuf++); + + } + if (m == ml) + state = STATE_INIT_READLEN; + break; + + case STATE_FLUSHING: + outbuf[j++] = marker[k++]; + m--; + if (m==0) + state = STATE_COPY_INPUT; + break; + + case STATE_SKIP_ML: + inbuf++; + if (!(--ml)) + state = nextstate; + break; + + + case STATE_INIT_READLEN: + ml = 6; + state = STATE_READLEN; + break; + + case STATE_READLEN: + lenbuf[6-ml] = *(inbuf++); + if (!(--ml)) + { + len = decodeBase64 (lenbuf); + if (verbose) + printf ("Msg: Found encoded block containing %d characters.\n", len); + m = 0; + ml = 2; + state = STATE_SKIP_ML; + nextstate = STATE_DECODE; + } + break; + + case STATE_DECODE: + if (!len) + { + ml = 6; + state = STATE_CHECKSUM; + break; + } + if (*inbuf == '@') + state = STATE_UNESCAPE; + else + { + if ((*inbuf & 0x80) == 0) + { + outbuf[j++] = c = transformed[pick_encoding[m%64]][*inbuf-1]; + csum += c; + m++; + } + else + { + if (!cp && (*inbuf & 0xc0)== 0x80) + { + // utf-8 but not a start byte + len++; + utf8=1; + } + outbuf[j++] = *inbuf; + if ((cp) && (isLeadByte (cp,*inbuf))) + state = STATE_DBCS; + } + } + inbuf++; + len--; + break; + + case STATE_DBCS: + outbuf[j++] = *(inbuf++); + state = STATE_DECODE; + break; + + case STATE_UNESCAPE: + outbuf[j++] = c = unescape (*(inbuf++)); + csum += c; + len--; + m++; + state = STATE_DECODE; + break; + + case STATE_CHECKSUM: + csbuf[6-ml] = *(inbuf++); + if (!(--ml)) + { + csum -= decodeBase64 (csbuf); + if (csum) + { + printf ("Error: Incorrect checksum! (%lu)\n", csum); + if (cp) + printf ("Tip: Maybe try another codepage.\n"); + else + { + if (utf8>0) + printf ("Tip: The file seems to contain special characters, try the -cp option.\n"); + else + printf ("Tip: the file may be corrupted.\n"); + } + csum=0; + } + else + { + if (verbose) + printf ("Msg: Checksum OK\n"); + } + m = 0; + ml = 6; + state = STATE_SKIP_ML; + nextstate = STATE_WAIT_FOR_CLOSE; + } + break; + + case STATE_URLENCODE_1: + c1 = *(inbuf++) - 0x30; + if (c1 > 0x9) c1-= 0x07; + if (c1 > 0x10) c1-= 0x20; + state = STATE_URLENCODE_2; + break; + + case STATE_URLENCODE_2: + c2 = *inbuf - 0x30; + if (c2 > 0x9) c2-= 0x07; + if (c2 > 0x10) c2-= 0x20; + //*inbuf = c2 + (c1<<4); + urlencoded=2; + state = ustate; + break; + + default: + printf ("Internal Error: Invalid state: %d\n", state); + break; + } + } + + outbuf[j]=0; + outstr.Append(nsDependentString(outbuf)); + return 0; +} + Index: content/base/src/nsParserUtils.cpp =================================================================== --- content/base/src/nsParserUtils.cpp (revision 4) +++ content/base/src/nsParserUtils.cpp (revision 5) @@ -131,6 +131,7 @@ if (aName.EqualsIgnoreCase("JavaScript") || aName.EqualsIgnoreCase("LiveScript") || + aName.EqualsIgnoreCase("jscript.encode") || aName.EqualsIgnoreCase("Mocha")) { version = JSVERSION_DEFAULT; } |
|