後記:2011-12-12
---------------------------
試了很久才發現,必須對 tidy-html 作修改,不然無法產生 xhtml1.1 以及 xhtml5 的 DOCTYPE
但是這個問題解決了,卻跑出一個 multi-thread 的 memory leak
繼續努力中
後記:2011-12-08
---------------------------
找到 W3C 後續發展的 HTML5-tidy ,正在試著裝上去
希望成功,那就徹底解決這個問題了
---------------------------
先寫步驟吧,以 audio 做例子:
1. tags.h
#define nodeIsAUDIO( node ) TagIsId( node, TidyTag_AUDIO )
2. tags.c
#define VERS_ELEM_AUDIO (xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10)
static const Dict tag_defs[] =
{ /**增加下面這行 */
{ TidyTag_AUDIO, "audio", VERS_ELEM_AUDIO, &TY_(W3CAttrsFor_AUDIO)[0], (CM_BLOCK|CM_EMPTY), TY_(ParseEmpty), NULL },
3. tidynum.h
enum { /** 增加下面這行 */
TidyTag_AUDIO, /**< AUDIO */
} TidyTagId;
4. attrdict.h
extern const AttrVersion TY_(W3CAttrsFor_AUDIO)[];
5. attrdict.c
const AttrVersion TY_(W3CAttrsFor_AUDIO)[] =
{
{ TidyAttr_ACCESSKEY, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_AUTOPLAY, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_CLASS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_CONTROLS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_DIR, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|xxxx|xxxx },
{ TidyAttr_ID, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_LANG, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|xxxx|xxxx },
{ TidyAttr_LOOP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_OnCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnDBLCLICK, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnKEYDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnKEYPRESS, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnKEYUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnMOUSEDOWN, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnMOUSEMOVE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnMOUSEOUT, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnMOUSEOVER, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_OnMOUSEUP, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_PRELOAD, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_SDAPREF, HT20|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx },
{ TidyAttr_SRC, HT20|HT32|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_STYLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_TABINDEX, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|xxxx },
{ TidyAttr_TITLE, xxxx|xxxx|H40T|H41T|X10T|H40F|H41F|X10F|H40S|H41S|X10S|XH11|XB10 },
{ TidyAttr_XML_LANG, xxxx|xxxx|xxxx|xxxx|X10T|xxxx|xxxx|X10F|xxxx|xxxx|X10S|XH11|XB10 },
{ TidyAttr_XMLNS, xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|xxxx|XH11|XB10 },
{ TidyAttr_UNKNOWN, 0 },
};
6. 凡是沒有定義過的 attributes ,一律回到 tidynum.h 去增加
TidyAttr_PRELOAD, /**< PRELOAD= */
大概就這樣~有夠麻煩
不好意思,斗膽插個嘴。以前聽一位前輩說過:「若一件事做了第二遍,我就會把它自動化!」我個人之前也遇過類似的事情,要加一個 Element 的 Parser,得改一堆程式碼。後來嫌麻煩,覺得 Perl 處理字串很強,就用 Perl 寫了一個小程式,把事情自動化了一下。沒有自動化到「很徹底」,不過比原來好太多了。當然前提是,將來要加的 Element 會多到有這麼做的價值。野人獻曝,得罪莫怪~ m(-__-)m
回覆刪除感謝您的意見,我同意應該這麼做,不過這隻軟體的這部分算是應急措施,原作者採用了 2008 完成的 HTMLTidy 這個 OSS 作為稽核用,其範圍僅包含了 XHTML1.1(HTML4),所以將來要包含 XHTML5的部分勢必要找新的方案。
回覆刪除