hd

MarkupSTL.h 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. // MarkupSTL.h: interface for the CMarkupSTL class.
  2. //
  3. // Markup Release 8.1
  4. // Copyright (C) 1999-2005 First Objective Software, Inc. All rights reserved
  5. // Go to www.firstobject.com for the latest CMarkupSTL and EDOM documentation
  6. // Use in commercial applications requires written permission
  7. // This software is provided "as is", with no warranty.
  8. #if !defined(AFX_MARKUPSTL_H__948A2705_9E68_11D2_A0BF_00105A27C570__INCLUDED_)
  9. #define AFX_MARKUPSTL_H__948A2705_9E68_11D2_A0BF_00105A27C570__INCLUDED_
  10. #pragma warning(disable:4996) // suppress VS 2005 deprecated function warnings
  11. #pragma warning(disable:4786) // for string
  12. #include <string>
  13. #ifdef _DEBUG
  14. #define _DS(i) (i?&(m_strDoc.c_str())[m_aPos[i].nStart]:0)
  15. #define MARKUP_SETDEBUGSTATE m_pMainDS=_DS(m_iPos); m_pChildDS=_DS(m_iPosChild)
  16. #else
  17. #define MARKUP_SETDEBUGSTATE
  18. #endif
  19. #if _MSC_VER >= 1200
  20. #define strnicmp _strnicmp
  21. #endif
  22. //////////////////////////////////////////////////////////////////////////
  23. // xml½âÎöÀà
  24. //////////////////////////////////////////////////////////////////////////
  25. class CMarkupSTL
  26. {
  27. public:
  28. CMarkupSTL() { SetDoc( NULL ); InitDocFlags(); };
  29. CMarkupSTL( const char* szDoc ) { SetDoc( szDoc ); InitDocFlags(); };
  30. CMarkupSTL( int nFlags ) { SetDoc( NULL ); m_nFlags = nFlags; };
  31. CMarkupSTL( const CMarkupSTL& markup ) { *this = markup; };
  32. void operator=( const CMarkupSTL& markup );
  33. ~CMarkupSTL() {};
  34. // Navigate
  35. bool Load( const char* szFileName );
  36. bool SetDoc( const char* szDoc );
  37. bool IsWellFormed();
  38. bool FindElem( const char* szName=NULL );
  39. bool FindChildElem( const char* szName=NULL );
  40. bool IntoElem();
  41. bool OutOfElem();
  42. void ResetChildPos() { x_SetPos(m_iPosParent,m_iPos,0); };
  43. void ResetMainPos() { x_SetPos(m_iPosParent,0,0); };
  44. void ResetPos() { x_SetPos(0,0,0); };
  45. std::string GetTagName() const;
  46. std::string GetChildTagName() const { return x_GetTagName(m_iPosChild); };
  47. std::string GetData() const { return x_GetData(m_iPos); };
  48. std::string GetChildData() const { return x_GetData(m_iPosChild); };
  49. std::string GetElemContent() const { return x_GetElemContent(m_iPos); };
  50. std::string GetAttrib( const char* szAttrib ) const { return x_GetAttrib(m_iPos,szAttrib); };
  51. std::string GetChildAttrib( const char* szAttrib ) const { return x_GetAttrib(m_iPosChild,szAttrib); };
  52. std::string GetAttribName( int n ) const;
  53. int FindNode( int nType=0 );
  54. int GetNodeType() { return m_nNodeType; };
  55. bool SavePos( const char* szPosName="" );
  56. bool RestorePos( const char* szPosName="" );
  57. const std::string& GetError() const { return m_strError; };
  58. int GetDocFlags() const { return m_nFlags; };
  59. void SetDocFlags( int nFlags ) { m_nFlags = nFlags; };
  60. enum MarkupDocFlags
  61. {
  62. MDF_IGNORECASE = 8,
  63. };
  64. enum MarkupNodeFlags
  65. {
  66. MNF_WITHCDATA = 0x01,
  67. MNF_WITHNOLINES = 0x02,
  68. MNF_WITHXHTMLSPACE = 0x04,
  69. MNF_WITHREFS = 0x08,
  70. MNF_WITHNOEND = 0x10,
  71. MNF_ESCAPEQUOTES = 0x100,
  72. MNF_NONENDED = 0x100000,
  73. MNF_ILLDATA = 0x200000,
  74. };
  75. enum MarkupNodeType
  76. {
  77. MNT_ELEMENT = 1, // 0x01
  78. MNT_TEXT = 2, // 0x02
  79. MNT_WHITESPACE = 4, // 0x04
  80. MNT_CDATA_SECTION = 8, // 0x08
  81. MNT_PROCESSING_INSTRUCTION = 16, // 0x10
  82. MNT_COMMENT = 32, // 0x20
  83. MNT_DOCUMENT_TYPE = 64, // 0x40
  84. MNT_EXCLUDE_WHITESPACE = 123,// 0x7b
  85. MNT_LONE_END_TAG = 128,// 0x80
  86. MNT_NODE_ERROR = 32768 // 0x8000
  87. };
  88. // Create
  89. bool Save( const char* szFileName );
  90. const std::string& GetDoc() const { return m_strDoc; };
  91. bool AddElem( const char* szName, const char* szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags); };
  92. bool InsertElem( const char* szName, const char* szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT); };
  93. bool AddChildElem( const char* szName, const char* szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_CHILD); };
  94. bool InsertChildElem( const char* szName, const char* szData=NULL, int nFlags=0 ) { return x_AddElem(szName,szData,nFlags|MNF_INSERT|MNF_CHILD); };
  95. bool AddElem( const char* szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags); };
  96. bool InsertElem( const char* szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT); };
  97. bool AddChildElem( const char* szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_CHILD); };
  98. bool InsertChildElem( const char* szName, int nValue, int nFlags=0 ) { return x_AddElem(szName,nValue,nFlags|MNF_INSERT|MNF_CHILD); };
  99. bool AddAttrib( const char* szAttrib, const char* szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); };
  100. bool AddChildAttrib( const char* szAttrib, const char* szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); };
  101. bool AddAttrib( const char* szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); };
  102. bool AddChildAttrib( const char* szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); };
  103. bool AddSubDoc( const char* szSubDoc ) { return x_AddSubDoc(szSubDoc,0); };
  104. bool InsertSubDoc( const char* szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_INSERT); };
  105. std::string GetSubDoc() const { return x_GetSubDoc(m_iPos); };
  106. bool AddChildSubDoc( const char* szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD); };
  107. bool InsertChildSubDoc( const char* szSubDoc ) { return x_AddSubDoc(szSubDoc,MNF_CHILD|MNF_INSERT); };
  108. std::string GetChildSubDoc() const { return x_GetSubDoc(m_iPosChild); };
  109. bool AddNode( int nType, const char* szText ) { return x_AddNode(nType,szText,0); };
  110. bool InsertNode( int nType, const char* szText ) { return x_AddNode(nType,szText,MNF_INSERT); };
  111. // Modify
  112. bool RemoveElem();
  113. bool RemoveChildElem();
  114. bool RemoveNode();
  115. bool SetAttrib( const char* szAttrib, const char* szValue ) { return x_SetAttrib(m_iPos,szAttrib,szValue); };
  116. bool SetChildAttrib( const char* szAttrib, const char* szValue ) { return x_SetAttrib(m_iPosChild,szAttrib,szValue); };
  117. bool SetAttrib( const char* szAttrib, int nValue ) { return x_SetAttrib(m_iPos,szAttrib,nValue); };
  118. bool SetChildAttrib( const char* szAttrib, int nValue ) { return x_SetAttrib(m_iPosChild,szAttrib,nValue); };
  119. bool SetData( const char* szData, int nFlags=0 ) { return x_SetData(m_iPos,szData,nFlags); };
  120. bool SetChildData( const char* szData, int nFlags=0 ) { return x_SetData(m_iPosChild,szData,nFlags); };
  121. bool SetData( int nValue ) { return x_SetData(m_iPos,nValue); };
  122. bool SetChildData( int nValue ) { return x_SetData(m_iPosChild,nValue); };
  123. bool SetElemContent( const char* szContent ) { return x_SetElemContent(szContent); };
  124. // Utility
  125. static bool ReadTextFile( const char* szFileName, std::string& strDoc, std::string* pstrError=NULL, int* pnFlags=NULL );
  126. static bool WriteTextFile( const char* szFileName, std::string& strDoc, std::string* pstrError=NULL, int* pnFlags=NULL );
  127. static std::string EscapeText( const char* szText, int nFlags = 0 );
  128. static std::string UnescapeText( const char* szText, int nTextLength = -1 );
  129. protected:
  130. #ifdef _DEBUG
  131. const char* m_pMainDS;
  132. const char* m_pChildDS;
  133. #endif
  134. std::string m_strDoc;
  135. std::string m_strError;
  136. int m_iPosParent;
  137. int m_iPos;
  138. int m_iPosChild;
  139. int m_iPosFree;
  140. int m_iPosDeleted;
  141. int m_nNodeType;
  142. int m_nNodeOffset;
  143. int m_nNodeLength;
  144. int m_nFlags;
  145. struct ElemPos
  146. {
  147. ElemPos() {};
  148. ElemPos( const ElemPos& pos ) { *this = pos; };
  149. enum { EP_STBITS=22, EP_STMASK=0x2fffff, EP_LEVMASK=0xffff };
  150. int StartTagLen() const { return (nTagLengths & EP_STMASK); };
  151. void SetStartTagLen( int n ) { nTagLengths = (nTagLengths & ~EP_STMASK) + n; };
  152. void AdjustStartTagLen( int n ) { nTagLengths += n; };
  153. int EndTagLen() const { return (nTagLengths >> EP_STBITS); };
  154. void SetEndTagLen( int n ) { nTagLengths = (nTagLengths & EP_STMASK) + (n << EP_STBITS); };
  155. bool IsEmptyElement() { return (StartTagLen()==nLength)?true:false; };
  156. int StartContent() const { return nStart + StartTagLen(); };
  157. int ContentLen() const { return nLength - StartTagLen() - EndTagLen(); };
  158. int StartAfter() const { return nStart + nLength; };
  159. int Level() const { return nFlags & EP_LEVMASK; };
  160. void SetLevel( int nLev ) { nFlags = (nFlags & ~EP_LEVMASK) | nLev; };
  161. void ClearVirtualParent() { memset(this,0,sizeof(ElemPos)); };
  162. // Memory size: 8 32-bit integers == 32 bytes
  163. int nStart;
  164. int nLength;
  165. int nTagLengths; // 22 bits 4MB limit for start tag, 10 bits 1K limit for end tag
  166. int nFlags; // 16 bits flags, 16 bits level 65536 depth limit
  167. int iElemParent;
  168. int iElemChild; // first child
  169. int iElemNext;
  170. int iElemPrev; // if this is first child, iElemPrev points to last
  171. };
  172. enum MarkupNodeFlagsInternal
  173. {
  174. MNF_REPLACE = 0x001000,
  175. MNF_INSERT = 0x002000,
  176. MNF_CHILD = 0x004000,
  177. MNF_QUOTED = 0x008000,
  178. MNF_EMPTY = 0x010000,
  179. MNF_DELETED = 0x020000,
  180. MNF_FIRST = 0x080000,
  181. MNF_PUBLIC = 0x300000,
  182. MNF_ILLFORMED = 0x800000,
  183. };
  184. struct NodePos
  185. {
  186. NodePos() {};
  187. NodePos( int n ) { nFlags=n; nNodeType=0; nStart=0; nLength=0; };
  188. int nNodeType;
  189. int nStart;
  190. int nLength;
  191. int nFlags;
  192. std::string strMeta;
  193. };
  194. struct TokenPos
  195. {
  196. TokenPos( const char* sz, int n ) { Clear(); szDoc=sz; nTokenFlags=n; };
  197. TokenPos( const std::string& str, int n ) { Clear(); szDoc=str.c_str(); nTokenFlags=n; };
  198. void Clear() { nL=0; nR=-1; nNext=0; };
  199. int Length() const { return nR - nL + 1; };
  200. bool Match( const std::string& strName ) { return Match(strName.c_str()); };
  201. bool Match( const char* szName )
  202. {
  203. int nLen = nR - nL + 1;
  204. if ( nTokenFlags & MDF_IGNORECASE )
  205. return ( (strnicmp( &szDoc[nL], szName, nLen ) == 0)
  206. && ( szName[nLen] == '\0' || strchr(" =/[]",szName[nLen]) ) );
  207. else
  208. return ( (strncmp( &szDoc[nL], szName, nLen ) == 0)
  209. && ( szName[nLen] == '\0' || strchr(" =/[]",szName[nLen]) ) );
  210. };
  211. int nL;
  212. int nR;
  213. int nNext;
  214. const char* szDoc;
  215. int nTokenFlags;
  216. int nPreSpaceStart;
  217. int nPreSpaceLength;
  218. };
  219. struct SavedPos
  220. {
  221. SavedPos() { nSavedPosFlags=0; iPos=0; };
  222. std::string strName;
  223. int iPos;
  224. int nSavedPosFlags;
  225. };
  226. struct SavedPosMap
  227. {
  228. SavedPosMap() { pTable = NULL; };
  229. ~SavedPosMap() { RemoveAll(); };
  230. void RemoveAll() { if (pTable) Release(); pTable=NULL; };
  231. enum { SPM_SIZE = 7, SPM_MAIN = 1, SPM_CHILD = 2, SPM_USED = 4, SPM_LAST = 8 };
  232. void Release() { for (int n=0;n<SPM_SIZE;++n) if (pTable[n]) delete[] pTable[n]; delete[] pTable; };
  233. void AllocMapTable() { pTable = new SavedPos*[SPM_SIZE]; for (int n=0; n<SPM_SIZE; ++n) pTable[n]=NULL; };
  234. int Hash( const char* szName ) { int n=0; while (*szName) n += *szName++; return n % SPM_SIZE; };
  235. SavedPos** pTable;
  236. };
  237. SavedPosMap m_mapSavedPos;
  238. struct PosArray
  239. {
  240. PosArray() { Clear(); };
  241. ~PosArray() { Release(); };
  242. enum { PA_SEGBITS = 16, PA_SEGMASK = 0xffff };
  243. void RemoveAll() { Release(); Clear(); };
  244. void Release() { for (int n=0;n<SegsUsed();++n) delete[] (char*)pSegs[n]; if (pSegs) delete[] (char*)pSegs; };
  245. void Clear() { nSegs=0; nSize=0; pSegs=NULL; };
  246. int GetSize() const { return nSize; };
  247. int SegsUsed() const { return ((nSize-1)>>PA_SEGBITS) + 1; };
  248. ElemPos& operator[](int n) const { return pSegs[n>>PA_SEGBITS][n&PA_SEGMASK]; };
  249. ElemPos** pSegs;
  250. int nSize;
  251. int nSegs;
  252. };
  253. PosArray m_aPos;
  254. struct NodeStack
  255. {
  256. NodeStack() { nTop=-1; nSize=0; pN=NULL; };
  257. ~NodeStack() { if (pN) delete [] pN; };
  258. NodePos& Top() { return pN[nTop]; };
  259. NodePos& At( int n ) { return pN[n]; };
  260. void Add() { ++nTop; if (nTop==nSize) Alloc(nSize*2+6); };
  261. void Remove() { --nTop; };
  262. int TopIndex() { return nTop; };
  263. protected:
  264. void Alloc( int nNewSize ) { NodePos* pNNew = new NodePos[nNewSize]; Copy(pNNew); nSize=nNewSize; };
  265. void Copy( NodePos* pNNew ) { for(int n=0;n<nSize;++n) pNNew[n]=pN[n]; if (pN) delete [] pN; pN=pNNew; };
  266. NodePos* pN;
  267. int nSize;
  268. int nTop;
  269. };
  270. void x_SetPos( int iPosParent, int iPos, int iPosChild )
  271. {
  272. m_iPosParent = iPosParent;
  273. m_iPos = iPos;
  274. m_iPosChild = iPosChild;
  275. m_nNodeOffset = 0;
  276. m_nNodeLength = 0;
  277. m_nNodeType = iPos?MNT_ELEMENT:0;
  278. MARKUP_SETDEBUGSTATE;
  279. };
  280. int x_GetFreePos()
  281. {
  282. if ( m_iPosFree == m_aPos.GetSize() )
  283. x_AllocPosArray();
  284. return m_iPosFree++;
  285. };
  286. bool x_AllocPosArray( int nNewSize = 0 );
  287. void InitDocFlags()
  288. {
  289. // To always ignore case, define MARKUP_IGNORECASE
  290. #ifdef MARKUP_IGNORECASE
  291. m_nFlags = MDF_IGNORECASE;
  292. #else
  293. m_nFlags = 0;
  294. #endif
  295. };
  296. bool x_ParseDoc();
  297. int x_ParseElem( int iPos, TokenPos& token );
  298. static bool x_FindAny( const char* szDoc, int& nChar );
  299. static bool x_FindName( TokenPos& token );
  300. static std::string x_GetToken( const TokenPos& token );
  301. int x_FindElem( int iPosParent, int iPos, const char* szPath ) const;
  302. std::string x_GetPath( int iPos ) const;
  303. std::string x_GetTagName( int iPos ) const;
  304. std::string x_GetData( int iPos ) const;
  305. std::string x_GetAttrib( int iPos, const char* szAttrib ) const;
  306. static std::string x_EncodeCDATASection( const char* szData );
  307. bool x_AddElem( const char* szName, const char* szValue, int nFlags );
  308. bool x_AddElem( const char* szName, int nValue, int nFlags );
  309. std::string x_GetSubDoc( int iPos ) const;
  310. bool x_AddSubDoc( const char* szSubDoc, int nFlags );
  311. static bool x_FindAttrib( TokenPos& token, const char* szAttrib, int n=0 );
  312. bool x_SetAttrib( int iPos, const char* szAttrib, const char* szValue );
  313. bool x_SetAttrib( int iPos, const char* szAttrib, int nValue );
  314. bool x_AddNode( int nNodeType, const char* szText, int nFlags );
  315. void x_RemoveNode( int iPosParent, int& iPos, int& nNodeType, int& nNodeOffset, int& nNodeLength );
  316. void x_AdjustForNode( int iPosParent, int iPos, int nShift );
  317. static bool x_CreateNode( std::string& strNode, int nNodeType, const char* szText );
  318. int x_InsertNew( int iPosParent, int& iPosRel, NodePos& node );
  319. void x_LinkElem( int iPosParent, int iPosBefore, int iPos );
  320. int x_UnlinkElem( int iPos );
  321. int x_ReleaseSubDoc( int iPos );
  322. int x_ReleasePos( int iPos );
  323. void x_CheckSavedPos();
  324. static int x_ParseNode( TokenPos& token, NodePos& node );
  325. bool x_SetData( int iPos, const char* szData, int nFlags );
  326. bool x_SetData( int iPos, int nValue );
  327. int x_RemoveElem( int iPos );
  328. std::string x_GetElemContent( int iPos ) const;
  329. bool x_SetElemContent( const char* szContent );
  330. void x_DocChange( int nLeft, int nReplace, const std::string& strInsert );
  331. void x_Adjust( int iPos, int nShift, bool bAfterPos = false );
  332. };
  333. #endif // !defined(AFX_MARKUPSTL_H__948A2705_9E68_11D2_A0BF_00105A27C570__INCLUDED_)