Madfox 补丁解析--jscript.encode

楼主^#
更多发布于：2005-04-01 10:26
当一个网页被读入的时候，解析器会根据不同的网页调用不同的内容分析模块(ContentSink)，对于HTML会调用HTMLSink。
HTMLSink在发现script这个tag的时候，会调用nsParserUtils里的一个函数，判断它是否是被支持的script，先修改这个判断，让jscript.encode这个language属性也成为被支持的脚本。
如果脚本是jscript.encode，在HTMLSink中将encode的部分decode，这样在后面就可以直接当javascript来处理了。中间的代码部分都是做decode用的。
这部分代码当且尽当网页中存在jscript.encode时才起作用。目前只有btchina使用这个功能。
Index: content/html/document/src/nsHTMLContentSink.cpp
===================================================================
--- content/html/document/src/nsHTMLContentSink.cpp     (revision 4)
+++ content/html/document/src/nsHTMLContentSink.cpp     (revision 5)
@@ -200,6 +200,9 @@
   NS_NewHTMLUnknownElement
 };
 
+int ScriptDecoder (const nsAString &instr, nsAString &outstr, 
+    unsigned int cp);
+
 class SinkContext;
 
 class HTMLContentSink : public nsContentSink,
@@ -4294,7 +4297,18 @@
     rv = NS_NewTextNode(getter_AddRefs(text));
     NS_ENSURE_SUCCESS(rv, rv);
 
-    text->SetText(script, PR_TRUE);
+    // check if it is jscript.encode
+    nsAutoString language;
+    nsCOMPtr<nsIDOMHTMLScriptElement> domele(do_QueryInterface(element));
+    domele->GetAttribute(NS_LITERAL_STRING("language"), language);
+    if (!language.IsEmpty()&&language.EqualsIgnoreCase("jscript.encode")) {
+      nsAutoString decscript;
+      ScriptDecoder(script, decscript, 0);
+      text->SetText(decscript, PR_TRUE);
+    }
+    else {
+      text->SetText(script, PR_TRUE);
+    }
 
     element->AppendChildTo(text, PR_FALSE, PR_FALSE);
     text->SetDocument(mDocument, PR_FALSE, PR_TRUE);
@@ -4595,3 +4609,411 @@
   return rv;
 }
 
+/*
+ * Following algorithm for decoding jscript.encode came from
+ * scrdec.c at http://www.virtualconspiracy.com and is is ©2000 by
+ * mrbrownstone @ virtualconspiracy.com. 
+ * Integration was done by robin.lu@sun.com
+ * Here's the original copyright message and disclaimer:
+ **********************************************************************/
+/* scrdec.c - Decoder for Microsoft Script Encoder                    */
+/* Version 1.5                                                        */
+/*                                                                    */
+/* COPYRIGHT:                                                         */
+/* (c)2000-2004 MrBrownstone, mrbrownstone@ virtualconspiracy.com     */
+/* v1.5 Bypassed a cleaver trick defeating this tool                  */
+/* v1.4 Some changes by Joe Steele to correct minor stuff             */
+/*                                                                    */
+/* DISCLAIMER:                                                        */
+/* Use of this program is at your own risk. The author cannot be held */
+/* responsible if any laws are broken by use of this program.         */
+/*                                                                    */
+/* If you use or distribute this code, this message should be held    */
+/* intact. Also, any program based upon this code should display the  */
+/* copyright message and the disclaimer.                              */
+/**********************************************************************/
+#define LEN_OUTBUF 1024
+
+#define STATE_INIT_COPY        100
+#define STATE_COPY_INPUT       101
+#define STATE_SKIP_ML          102
+#define STATE_CHECKSUM         103
+#define STATE_READLEN          104
+#define STATE_DECODE           105
+#define STATE_UNESCAPE         106
+#define STATE_FLUSHING         107
+#define STATE_DBCS             108
+#define STATE_INIT_READLEN     109
+#define STATE_URLENCODE_1      110
+#define STATE_URLENCODE_2      111
+#define STATE_WAIT_FOR_CLOSE   112
+
+
+
+unsigned char unescape (unsigned char c)
+{
+    static unsigned char escapes[] = "#&!*$";
+    static unsigned char escaped[] = "\r\n<>@";
+    int i=0;
+
+    if (c & 0x80)
+        return c;
+    while (escapes[i])
+    {
+        if (escapes[i] == c)
+            return escaped[i];
+        i++;
+    }    
+    return '?';
+}
+
+unsigned long int decodeBase64 (PRUnichar *p)
+{
+    unsigned long int val = 0;
+
+    static int digits[0x7a]= {
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+        0x00,0x3e,0x00,0x00,0x00,0x3f,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,
+        0x3c,0x3d,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x02,0x03,0x04,
+        0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,0x11,0x12,
+        0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x00,0x00,0x00,0x00,0x00,0x00,0x1a,
+        0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
+        0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32
+    };
+
+
+    val +=  (digits[p[0]] << 2);
+    val +=  (digits[p[1]] >> 4);
+    val +=  (digits[p[1]] & 0xf) << 12;
+    val += ((digits[p[2]] >> 2) << 8); 
+    val += ((digits[p[2]] & 0x3) << 22);
+    val +=  (digits[p[3]] << 16);
+    val += ((digits[p[4]] << 2) << 24);
+    val += ((digits[p[5]] >> 4) << 24);
+
+    /* 543210 543210 543210 543210 543210 543210
+
+       765432 
+              10
+                     ba98
+                fedc
+                         76
+                            543210
+                                   fedcba 98----
+       |- LSB -||-     -||-     -| |- MSB -|
+    */
+    return val;
+}
+
+/*
+ Char. number range  |        UTF-8 octet sequence
+      (hexadecimal)    |              (binary)
+   --------------------+---------------------------------------------
+   0000 0000-0000 007F | 0xxxxxxx
+   0000 0080-0000 07FF | 110xxxxx 10xxxxxx
+   0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
+   0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+*/
+
+int isLeadByte (unsigned int cp, unsigned char ucByte)
+{
+    /* Code page 932 - Japanese Shift-JIS       - 0x81-0x9f 
+                                                  0xe0-0xfc 
+                 936 - Simplified Chinese GBK   - 0xa1-0xfe
+                 949 - Korean Wansung           - 0x81-0xfe
+                 950 - Traditional Chinese Big5 - 0x81-0xfe 
+                1361 - Korean Johab             - 0x84-0xd3 
+                                                  0xd9-0xde
+                                                  0xe0-0xf9 */
+    switch (cp)
+    {
+        case 932:
+            if ((ucByte > 0x80) && (ucByte < 0xa0))    return 1;
+            if ((ucByte > 0xdf) && (ucByte < 0xfd))    return 1;
+            else return 0;
+        case 936:
+            if ((ucByte > 0xa0) && (ucByte < 0xff)) return 1;
+            else return 0;
+        case 949:
+        case 950:
+            if ((ucByte > 0x80) && (ucByte < 0xff)) return 1;
+            else return 0;
+        case 1361:
+            if ((ucByte > 0x83) && (ucByte < 0xd4)) return 1;
+            if ((ucByte > 0xd8) && (ucByte < 0xdf)) return 1;
+            if ((ucByte > 0xdf) && (ucByte < 0xfa)) return 1;
+            else return 0;
+        default:
+            return 0;
+    }
+
+}
+
+int ScriptDecoder (const nsAString &instr, nsAString &outstr, unsigned int cp)
+{
+    const unsigned char pick_encoding[64] = {
+    1, 2, 0, 1, 2, 0, 2, 0, 0, 2, 0, 2, 1, 0, 2, 0, 
+    1, 0, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 0, 0, 2, 
+    1, 1, 0, 2, 0, 2, 0, 1, 0, 1, 1, 2, 0, 1, 0, 2, 
+    1, 0, 2, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 0, 2
+    };
+
+    unsigned char transformed[3][127] = {
+        { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x7b,0x7b,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x32,0x30,0x21,0x29,0x5b,
+          0x38,0x33,0x3d,0x58,0x3a,0x35,0x65,0x39,0x5c,0x56,0x73,0x66,
+          0x4e,0x45,0x6b,0x62,0x59,0x78,0x5e,0x7d,0x4a,0x6d,0x71,0x71,
+          0x60,0x60,0x53,0x53,0x42,0x27,0x48,0x72,0x75,0x31,0x37,0x4d,
+          0x52,0x22,0x54,0x6a,0x47,0x64,0x2d,0x20,0x7f,0x2e,0x4c,0x5d,
+          0x7e,0x6c,0x6f,0x79,0x74,0x43,0x26,0x76,0x25,0x24,0x2b,0x28,
+          0x23,0x41,0x34,0x09,0x2a,0x44,0x3f,0x77,0x3b,0x55,0x69,0x61,
+          0x63,0x50,0x67,0x51,0x49,0x4f,0x46,0x68,0x7c,0x36,0x70,0x6e,
+          0x7a,0x2f,0x5f,0x4b,0x5a,0x2c,0x57},
+        { 0x57,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x57,0x57,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2e,0x47,0x7a,0x56,0x42,
+          0x6a,0x2f,0x26,0x49,0x41,0x34,0x32,0x5b,0x76,0x72,0x43,0x38,
+          0x39,0x70,0x45,0x68,0x71,0x4f,0x09,0x62,0x44,0x23,0x75,0x75,
+          0x7e,0x7e,0x5e,0x5e,0x77,0x4a,0x61,0x5d,0x22,0x4b,0x6f,0x4e,
+          0x3b,0x4c,0x50,0x67,0x2a,0x7d,0x74,0x54,0x2b,0x2d,0x2c,0x30,
+          0x6e,0x6b,0x66,0x35,0x25,0x21,0x64,0x4d,0x52,0x63,0x3f,0x7b,
+          0x78,0x29,0x28,0x73,0x59,0x33,0x7f,0x6d,0x55,0x53,0x7c,0x3a,
+          0x5f,0x65,0x46,0x58,0x31,0x69,0x6c,0x5a,0x48,0x27,0x5c,0x3d,
+          0x24,0x79,0x37,0x60,0x51,0x20,0x36},
+        { 0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x6e,0x6e,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+          0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x2d,0x75,0x52,0x60,0x71,
+          0x5e,0x49,0x5c,0x62,0x7d,0x29,0x36,0x20,0x7c,0x7a,0x7f,0x6b,
+          0x63,0x33,0x2b,0x68,0x51,0x66,0x76,0x31,0x64,0x54,0x43,0x43,
+          0x3a,0x3a,0x7e,0x7e,0x45,0x2c,0x2a,0x74,0x27,0x37,0x44,0x79,
+          0x59,0x2f,0x6f,0x26,0x72,0x6a,0x39,0x7b,0x3f,0x38,0x77,0x67,
+          0x53,0x47,0x34,0x78,0x5d,0x30,0x23,0x5a,0x5b,0x6c,0x48,0x55,
+          0x70,0x69,0x2e,0x4c,0x21,0x24,0x4e,0x50,0x09,0x56,0x73,0x35,
+          0x61,0x4b,0x58,0x3b,0x57,0x22,0x6d,0x4d,0x25,0x28,0x46,0x4a,
+          0x32,0x41,0x3d,0x5f,0x4f,0x42,0x65}
+    };
+
+
+    int urlencoded = 0;
+    int verbose = 0;
+    nsString::const_iterator inbuf, end;
+    instr.BeginReading(inbuf);
+    instr.EndReading(end);
+    PRUnichar outbuf[LEN_OUTBUF+1];
+    PRUnichar c, c1, c2, lenbuf[7], csbuf[7];
+    unsigned char marker[] = "#@~^";
+    int ustate, nextstate, state = 0;
+    int i, j, k, m, ml = 0;
+    int utf8 = 0;
+    unsigned long int csum = 0, len = 0;
+
+    state = STATE_INIT_COPY;
+    i = 0;
+    j = 0;
+
+    while (state)
+    {
+        if (inbuf == end)
+        {
+              if (len) 
+              {
+                  printf ("Error: Premature end of file.\n");
+                  if (utf8>0)
+                      printf ("Tip: The file seems to contain special characters, try the -cp option.\n");
+              }
+              break;
+        }
+
+        if (j == LEN_OUTBUF)
+        {
+            outstr.Append(nsDependentString(outbuf));
+            j = 0;
+        }
+
+        if ((urlencoded==1) && (*inbuf=='%'))
+        {
+            ustate = state;
+            state = STATE_URLENCODE_1;
+            inbuf++;
+            continue;
+        }
+
+        if (urlencoded==2) 
+            urlencoded=1;
+
+        switch (state)
+        {
+            case STATE_INIT_COPY: 
+                ml = strlen ((const char*)marker);
+                m = 0;
+                state = STATE_COPY_INPUT;
+                break;
+
+            case STATE_WAIT_FOR_CLOSE:
+                if (*inbuf == '>')
+                    state = STATE_INIT_COPY;
+                outbuf[j++] = *(inbuf++);
+                break;
+
+            case STATE_COPY_INPUT:
+                if (*inbuf == marker[m])
+                {
+                    inbuf++;
+                    m++;
+                }
+                else
+                {
+                    if (m)
+                    {
+                        k = 0;
+                        state = STATE_FLUSHING;
+                    }
+                    else
+                        outbuf[j++] = *(inbuf++);
+
+                }
+                if (m == ml)
+                    state = STATE_INIT_READLEN;
+                break;
+
+            case STATE_FLUSHING:
+                outbuf[j++] = marker[k++];
+                m--;
+                if (m==0)
+                    state = STATE_COPY_INPUT;
+                break;
+
+            case STATE_SKIP_ML: 
+                inbuf++;
+                if (!(--ml))
+                    state = nextstate;
+                break;
+
+
+            case STATE_INIT_READLEN: 
+                ml = 6;
+                state = STATE_READLEN;
+                break;
+
+            case STATE_READLEN: 
+                lenbuf[6-ml] = *(inbuf++);
+                if (!(--ml))
+                {
+                    len = decodeBase64 (lenbuf);
+                    if (verbose)
+                        printf ("Msg: Found encoded block containing %d characters.\n", len);
+                    m = 0;
+                    ml = 2;
+                    state = STATE_SKIP_ML;
+                    nextstate = STATE_DECODE;
+                }
+                break;
+
+            case STATE_DECODE: 
+                if (!len)
+                {
+                    ml = 6;
+                    state = STATE_CHECKSUM;
+                    break;
+                }
+                if (*inbuf == '@') 
+                    state = STATE_UNESCAPE;
+                else
+                {
+                    if ((*inbuf & 0x80) == 0)
+                    {
+                        outbuf[j++] = c = transformed[pick_encoding[m%64]][*inbuf-1];
+                        csum += c;
+                        m++;
+                    }
+                    else 
+                    {
+                        if (!cp && (*inbuf & 0xc0)== 0x80) 
+                        {
+                            // utf-8 but not a start byte
+                            len++;
+                            utf8=1;
+                        }
+                        outbuf[j++] = *inbuf;
+                        if ((cp) && (isLeadByte (cp,*inbuf)))
+                            state = STATE_DBCS;
+                    }
+                }
+                inbuf++;
+                len--;
+                break;
+
+            case STATE_DBCS:
+                outbuf[j++] = *(inbuf++);
+                state = STATE_DECODE;
+                break;
+
+            case STATE_UNESCAPE: 
+                outbuf[j++] = c = unescape (*(inbuf++));
+                csum += c;
+                len--;
+                m++;
+                state = STATE_DECODE;
+                break;
+
+            case STATE_CHECKSUM: 
+                csbuf[6-ml] = *(inbuf++);
+                if (!(--ml))
+                {
+                    csum -= decodeBase64 (csbuf);
+                    if (csum)
+                    {
+                        printf ("Error: Incorrect checksum! (%lu)\n", csum);
+                        if (cp)
+                            printf ("Tip: Maybe try another codepage.\n");
+                        else
+                        {
+                            if (utf8>0)
+                                printf ("Tip: The file seems to contain special characters, try the -cp option.\n");
+                            else
+                                printf ("Tip: the file may be corrupted.\n");
+                        }
+                        csum=0;
+                    }
+                    else 
+                    {
+                        if (verbose)
+                            printf ("Msg: Checksum OK\n");
+                    }
+                    m = 0;
+                    ml = 6;
+                    state = STATE_SKIP_ML;
+                     nextstate = STATE_WAIT_FOR_CLOSE;
+                }
+                break;
+
+            case STATE_URLENCODE_1:
+                c1 = *(inbuf++) - 0x30;
+                if (c1 > 0x9) c1-= 0x07;
+                if (c1 > 0x10) c1-= 0x20;
+                state = STATE_URLENCODE_2;
+                break;
+
+            case STATE_URLENCODE_2:
+                c2 = *inbuf - 0x30;
+                if (c2 > 0x9) c2-= 0x07;
+                if (c2 > 0x10) c2-= 0x20;
+                //*inbuf = c2 + (c1<<4);
+                urlencoded=2;
+                state = ustate;
+                break;
+
+            default:
+                printf ("Internal Error: Invalid state: %d\n", state);
+                break;
+        }
+    }
+
+    outbuf[j]=0;
+    outstr.Append(nsDependentString(outbuf));
+    return 0;
+}
+
Index: content/base/src/nsParserUtils.cpp
===================================================================
--- content/base/src/nsParserUtils.cpp  (revision 4)
+++ content/base/src/nsParserUtils.cpp  (revision 5)
@@ -131,6 +131,7 @@
 
   if (aName.EqualsIgnoreCase("JavaScript") ||
       aName.EqualsIgnoreCase("LiveScript") ||
+      aName.EqualsIgnoreCase("jscript.encode") ||
       aName.EqualsIgnoreCase("Mocha")) {
     version = JSVERSION_DEFAULT;
   }
喜欢0
Madfox 项目 -- 探索Firefox的非标准兼容性
Blog--破网录
发帖回复
« 返回列表
您需要登录后才可以回帖，登录或者注册
返回顶部
Madfox 补丁解析--jscript.encode

最新喜欢：